ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.358
Committed: Tue May 4 17:06:14 2010 UTC (14 years, 11 months ago) by spiga
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_7_3_beta
Changes since 1.357: +10 -3 lines
Log Message:
merge 2.7.1_branch

File Contents

# User Rev Content
1 spiga 1.358
2     __revision__ = "$Id: cms_cmssw.py,v 1.352.4.4 2010/05/04 10:36:18 spiga Exp $"
3     __version__ = "$Revision: 1.352.4.4 $"
4    
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 ewv 1.355 from Downloader import Downloader
12 slacapra 1.1
13 spiga 1.293 from IMProv.IMProvNode import IMProvNode
14 ewv 1.355 from IMProv.IMProvLoader import loadIMProvFile
15 slacapra 1.105 import os, string, glob
16 ewv 1.355 from xml.dom import pulldom
17 slacapra 1.1
18     class Cmssw(JobType):
19 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
20 slacapra 1.1 JobType.__init__(self, 'CMSSW')
21 spiga 1.304 common.logger.debug('CMSSW::__init__')
22 spiga 1.208 self.skip_blocks = skip_blocks
23 farinafa 1.346 self.argsList = 2
24 spiga 1.315 self.NumEvents=0
25 gutsche 1.3 self._params = {}
26     self.cfg_params = cfg_params
27 ewv 1.254
28 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
29 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
30     self.cfg_params.get('CRAB.use_server',0)
31 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
32 ewv 1.250 size = 9.5
33 ewv 1.333 if self.server or self.local:
34 ewv 1.319 size = 99999
35 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
36 gutsche 1.72
37 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
38 gutsche 1.38 self.ncjobs = ncjobs
39    
40 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
41     self.additional_inbox_files = []
42     self.scriptExe = ''
43     self.executable = ''
44 slacapra 1.71 self.executable_arch = self.scram.getArch()
45 spiga 1.320 self.tgz_name = 'default.tgz'
46 corvo 1.56 self.scriptName = 'CMSSW.sh'
47 ewv 1.192 self.pset = ''
48 spiga 1.187 self.datasetPath = ''
49 gutsche 1.3
50 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
51 gutsche 1.50 # set FJR file name
52     self.fjrFileName = 'crab_fjr.xml'
53    
54 slacapra 1.1 self.version = self.scram.getSWVersion()
55 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
56 spiga 1.324 version_array = self.version.split('_')
57     self.CMSSW_major = 0
58     self.CMSSW_minor = 0
59     self.CMSSW_patch = 0
60 ewv 1.182 try:
61 spiga 1.324 self.CMSSW_major = int(version_array[1])
62     self.CMSSW_minor = int(version_array[2])
63     self.CMSSW_patch = int(version_array[3])
64 ewv 1.182 except:
65 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
66 ewv 1.182 raise CrabException(msg)
67    
68 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
69     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
70 ewv 1.276 raise CrabException(msg)
71     """
72     As CMSSW versions are dropped we can drop more code:
73 ewv 1.334 2.x dropped: drop check for lumi range setting
74 ewv 1.276 """
75 ewv 1.355 self.checkCMSSWVersion()
76 slacapra 1.1 ### collect Data cards
77 gutsche 1.66
78 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
79 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
80 ewv 1.226
81 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
82 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
83 spiga 1.236
84     if tmp =='':
85     msg = "Error: datasetpath not defined "
86     raise CrabException(msg)
87     elif string.lower(tmp)=='none':
88 slacapra 1.153 self.datasetPath = None
89     self.selectNoInput = 1
90 fanzago 1.318 self.primaryDataset = 'null'
91 slacapra 1.153 else:
92     self.datasetPath = tmp
93     self.selectNoInput = 0
94 fanzago 1.338 ll = len(self.datasetPath.split("/"))
95     if (ll < 4):
96     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
97     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
98     raise CrabException(msg)
99 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
100     self.dataTier = self.datasetPath.split("/")[2]
101 gutsche 1.5
102 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
103 ewv 1.330 self.ads = False
104     if self.datasetPath:
105     self.ads = len(self.datasetPath.split("/")) > 4
106 spiga 1.354 self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
107 ewv 1.356 self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
108     self.cfg_params.get('CMSSW.lumis_per_job',None)
109 spiga 1.358
110 ewv 1.327 # FUTURE: Can remove this check
111     if self.ads and self.CMSSW_major < 3:
112     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
113     common.logger.info(' Only file level, not lumi level, granularity is supported.')
114    
115 spiga 1.288 self.debugWrap=''
116 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
117     if self.debug_wrapper == 1: self.debugWrap='--debug'
118 slacapra 1.291
119 slacapra 1.1 ## now the application
120 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
121 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
122 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
123 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
124 slacapra 1.1
125 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
126 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
127 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
128 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
129 slacapra 1.153 if self.pset.lower() != 'none' :
130     if (not os.path.exists(self.pset)):
131     raise CrabException("User defined PSet file "+self.pset+" does not exist")
132     else:
133     self.pset = None
134 slacapra 1.1
135     # output files
136 slacapra 1.53 ## stuff which must be returned always via sandbox
137     self.output_file_sandbox = []
138    
139     # add fjr report by default via sandbox
140     self.output_file_sandbox.append(self.fjrFileName)
141    
142     # other output files to be returned via sandbox or copied to SE
143 mcinquil 1.216 outfileflag = False
144 slacapra 1.153 self.output_file = []
145     tmp = cfg_params.get('CMSSW.output_file',None)
146     if tmp :
147 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
148 mcinquil 1.216 outfileflag = True #output found
149     #else:
150     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
151 slacapra 1.1
152     # script_exe file as additional file in inputSandbox
153 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
154     if self.scriptExe :
155 slacapra 1.176 if not os.path.isfile(self.scriptExe):
156     msg ="ERROR. file "+self.scriptExe+" not found"
157     raise CrabException(msg)
158     self.additional_inbox_files.append(string.strip(self.scriptExe))
159 slacapra 1.70
160 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
161     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
162    
163 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
164 slacapra 1.176 msg ="Error. script_exe not defined"
165     raise CrabException(msg)
166 spiga 1.42
167 ewv 1.226 # use parent files...
168 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
169 spiga 1.204
170 slacapra 1.1 ## additional input files
171 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
172 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
173 slacapra 1.70 for tmp in tmpAddFiles:
174     tmp = string.strip(tmp)
175     dirname = ''
176     if not tmp[0]=="/": dirname = "."
177 corvo 1.85 files = []
178     if string.find(tmp,"*")>-1:
179     files = glob.glob(os.path.join(dirname, tmp))
180     if len(files)==0:
181     raise CrabException("No additional input file found with this pattern: "+tmp)
182     else:
183     files.append(tmp)
184 slacapra 1.70 for file in files:
185     if not os.path.exists(file):
186     raise CrabException("Additional input file not found: "+file)
187 slacapra 1.45 pass
188 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
189 slacapra 1.1 pass
190     pass
191 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
192 slacapra 1.153 pass
193 gutsche 1.3
194 gutsche 1.35
195 ewv 1.160 ## New method of dealing with seeds
196     self.incrementSeeds = []
197     self.preserveSeeds = []
198     if cfg_params.has_key('CMSSW.preserve_seeds'):
199     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
200     for tmp in tmpList:
201     tmp.strip()
202     self.preserveSeeds.append(tmp)
203     if cfg_params.has_key('CMSSW.increment_seeds'):
204     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
205     for tmp in tmpList:
206     tmp.strip()
207     self.incrementSeeds.append(tmp)
208    
209 fanzago 1.318 # Copy/return/publish
210 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
211     self.return_data = int(cfg_params.get('USER.return_data',0))
212 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
213     if (self.publish_data == 1):
214     if not cfg_params.has_key('USER.publish_data_name'):
215     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
216     else:
217     self.processedDataset = cfg_params['USER.publish_data_name']
218 ewv 1.329 """
219 fanzago 1.328 #### check of length of datasetname to publish ####
220 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
221 fanzago 1.328 print "test 100 char limit on datasetname"
222     ###
223     len_file = 0
224     print "self.output_file = ", self.output_file
225     for file in self.output_file:
226     length = len(file)
227     if length > len_file:
228     len_file = length
229 ewv 1.329 print "len_file = ", len_file
230 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
231 ewv 1.329 ###
232 fanzago 1.318 user = getUserName()
233 fanzago 1.328 len_user_name = len(user)
234 fanzago 1.318 common.logger.debug("user = " + user)
235 fanzago 1.328 print "len_user_name = ", len_user_name
236 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
237 fanzago 1.328
238 fanzago 1.318 len_processedDataset = len(self.processedDataset)
239     common.logger.debug("processedDataset " + self.processedDataset)
240     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
241 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
242 ewv 1.329
243 fanzago 1.318 if (self.datasetPath != None ):
244     len_primary = len(self.primaryDataset)
245     common.logger.debug("primaryDataset = " + self.primaryDataset)
246     common.logger.debug("len_primary = " + str(len_primary))
247 fanzago 1.328 if (len_primary > 100):
248     raise CrabException("Warning: primary datasetname has to be < 100 characters")
249     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
250     #if (len_processedDataset > (59 - len_user_name - len_primary)):
251     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
252     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
253     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
254 fanzago 1.318 else:
255 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
256     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
257     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
258     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
259 ewv 1.329 """
260 ewv 1.276
261     self.conf = {}
262     self.conf['pubdata'] = None
263 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
264 slacapra 1.1 #DBSDLS-start
265 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
266 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
267     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
268 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
269 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
270 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
271 gutsche 1.35 blockSites = {}
272 spiga 1.342 #wmbs
273     self.automation = int(self.cfg_params.get('WMBS.automation',0))
274     if self.automation == 0:
275     if self.datasetPath:
276     blockSites = self.DataDiscoveryAndLocation(cfg_params)
277     #DBSDLS-end
278     self.conf['blockSites']=blockSites
279 ewv 1.347
280 spiga 1.342 ## Select Splitting
281     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
282 ewv 1.347
283 spiga 1.342 if self.selectNoInput:
284     if self.pset == None:
285     self.algo = 'ForScript'
286     else:
287     self.algo = 'NoInput'
288     self.conf['managedGenerators']=self.managedGenerators
289     self.conf['generator']=self.generator
290 ewv 1.356 elif self.ads or self.lumiMask or self.lumiParams:
291 spiga 1.342 self.algo = 'LumiBased'
292     elif splitByRun ==1:
293     self.algo = 'RunBased'
294 spiga 1.42 else:
295 spiga 1.342 self.algo = 'EventBased'
296     common.logger.debug("Job splitting method: %s" % self.algo)
297 ewv 1.347
298 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
299     self.dict = splitter.Algos()[self.algo]()
300 gutsche 1.5
301 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
302     self.rootArgsFilename= 'arguments'
303 spiga 1.208 # modify Pset only the first time
304 spiga 1.320 if isNew:
305     if self.pset != None: self.ModifyPset()
306 spiga 1.300
307 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
308     self.tarNameWithPath = self.getTarBall(self.executable)
309 spiga 1.293
310    
311     def ModifyPset(self):
312     import PsetManipulator as pp
313 ewv 1.335
314     # If pycfg_params set, fake out the config script
315     # to make it think it was called with those args
316     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
317     if pycfg_params:
318     trueArgv = sys.argv
319     sys.argv = [self.pset]
320     sys.argv.extend(pycfg_params.split(' '))
321 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
322 ewv 1.335 if pycfg_params: # Restore original sys.argv
323     sys.argv = trueArgv
324    
325 spiga 1.293 try:
326     # Add FrameworkJobReport to parameter-set, set max events.
327     # Reset later for data jobs by writeCFG which does all modifications
328 ewv 1.295 PsetEdit.maxEvent(1)
329 spiga 1.293 PsetEdit.skipEvent(0)
330     PsetEdit.psetWriter(self.configFilename())
331     ## If present, add TFileService to output files
332 slacapra 1.349 if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
333 spiga 1.293 tfsOutput = PsetEdit.getTFileService()
334     if tfsOutput:
335     if tfsOutput in self.output_file:
336 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
337 spiga 1.293 else:
338     outfileflag = True #output found
339     self.output_file.append(tfsOutput)
340 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
341 spiga 1.293 pass
342     pass
343 ewv 1.321 # If present and requested, add PoolOutputModule to output files
344 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
345 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
346     if edmOutput:
347 ewv 1.321 for outputFile in edmOutput:
348     if outputFile in self.output_file:
349 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
350 ewv 1.321 else:
351     self.output_file.append(outputFile)
352     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
353     # not requested, check anyhow to avoid accidental T2 overload
354 slacapra 1.297 else:
355 ewv 1.321 if edmOutput:
356     missedFiles = []
357     for outputFile in edmOutput:
358     if outputFile not in self.output_file:
359     missedFiles.append(outputFile)
360     if missedFiles:
361     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
362     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
363     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
364     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
365     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
366     common.logger.info(msg)
367 spiga 1.322 else :
368 ewv 1.321 raise CrabException(msg)
369 ewv 1.301
370     if (PsetEdit.getBadFilesSetting()):
371     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
372 spiga 1.304 common.logger.info(msg)
373 ewv 1.301
374 slacapra 1.297 except CrabException, msg:
375 spiga 1.304 common.logger.info(str(msg))
376 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
377 spiga 1.293 raise CrabException(msg)
378    
379 gutsche 1.3
380 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
381    
382 slacapra 1.86 import DataDiscovery
383     import DataLocation
384 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
385 gutsche 1.3
386     datasetPath=self.datasetPath
387    
388 slacapra 1.1 ## Contact the DBS
389 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
390 slacapra 1.1 try:
391 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
392 slacapra 1.1 self.pubdata.fetchDBSInfo()
393    
394 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
395 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
396     raise CrabException(msg)
397 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
398 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
399     raise CrabException(msg)
400 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
401 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
402 slacapra 1.1 raise CrabException(msg)
403    
404 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
405 slacapra 1.270 #print self.filesbyblock
406 spiga 1.269 self.conf['pubdata']=self.pubdata
407 gutsche 1.3
408 slacapra 1.1 ## get max number of events
409 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
410 slacapra 1.1
411     ## Contact the DLS and build a list of sites hosting the fileblocks
412     try:
413 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
414 gutsche 1.6 dataloc.fetchDLSInfo()
415 slacapra 1.263
416 slacapra 1.41 except DataLocation.DataLocationError , ex:
417 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
418     raise CrabException(msg)
419 ewv 1.131
420 slacapra 1.1
421 slacapra 1.270 unsorted_sites = dataloc.getSites()
422     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
423     for lfn in self.filesbyblock.keys():
424     if unsorted_sites.has_key(lfn):
425     sites[lfn]=unsorted_sites[lfn]
426     else:
427     sites[lfn]=[]
428    
429 slacapra 1.264 if len(sites)==0:
430 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
431     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
432     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
433 slacapra 1.264 raise CrabException(msg)
434    
435 gutsche 1.35 allSites = []
436     listSites = sites.values()
437 slacapra 1.63 for listSite in listSites:
438     for oneSite in listSite:
439 gutsche 1.35 allSites.append(oneSite)
440 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
441 ewv 1.295
442 gutsche 1.3
443 gutsche 1.92 # screen output
444 spiga 1.354 if self.ads or self.lumiMask:
445     common.logger.info("Requested (A)DS %s has %s block(s)." %
446 ewv 1.350 (datasetPath, len(self.filesbyblock.keys())))
447     else:
448     common.logger.info("Requested dataset: " + datasetPath + \
449     " has " + str(self.maxEvents) + " events in " + \
450     str(len(self.filesbyblock.keys())) + " blocks.\n")
451 gutsche 1.92
452 gutsche 1.35 return sites
453 ewv 1.131
454 spiga 1.42
455 spiga 1.208 def split(self, jobParams,firstJobID):
456 ewv 1.276
457 spiga 1.293 jobParams = self.dict['args']
458 spiga 1.269 njobs = self.dict['njobs']
459     self.jobDestination = self.dict['jobDestination']
460 ewv 1.131
461 ewv 1.333 if njobs == 0:
462     raise CrabException("Asked to split zero jobs: aborting")
463     if not self.server and not self.local and njobs > 500:
464     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
465 slacapra 1.263
466 gutsche 1.3 # create the empty structure
467     for i in range(njobs):
468     jobParams.append("")
469 ewv 1.131
470 spiga 1.165 listID=[]
471     listField=[]
472 spiga 1.293 listDictions=[]
473 spiga 1.300 exist= os.path.exists(self.argsFile)
474 spiga 1.208 for id in range(njobs):
475     job = id + int(firstJobID)
476 spiga 1.167 listID.append(job+1)
477 spiga 1.162 job_ToSave ={}
478 spiga 1.169 concString = ' '
479 spiga 1.165 argu=''
480 spiga 1.293 str_argu = str(job+1)
481 spiga 1.208 if len(jobParams[id]):
482 ewv 1.295 argu = {'JobID': job+1}
483 spiga 1.293 for i in range(len(jobParams[id])):
484     argu[self.dict['params'][i]]=jobParams[id][i]
485 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
486 ewv 1.295 # just for debug
487 spiga 1.293 str_argu += concString.join(jobParams[id])
488 spiga 1.314 if argu != '': listDictions.append(argu)
489 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
490 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
491 spiga 1.165 listField.append(job_ToSave)
492 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
493     cms_se = CmsSEMap()
494 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
495 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
496 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
497     msg+="\t CMSDestination: %s "%(str(SEDestination))
498 spiga 1.307 common.logger.log(10-1,msg)
499 spiga 1.293 # write xml
500 ewv 1.295 if len(listDictions):
501 spiga 1.293 if exist==False: self.CreateXML()
502     self.addEntry(listDictions)
503 spiga 1.320 # self.zipXMLfile()
504 spiga 1.187 common._db.updateJob_(listID,listField)
505 spiga 1.293 return
506 ewv 1.313
507 spiga 1.320 # def zipXMLfile(self):
508 ewv 1.313
509 spiga 1.320 # import tarfile
510     # try:
511     # tar = tarfile.open(self.tarNameWithPath, "a")
512     # tar.add(self.argsFile, os.path.basename(self.argsFile))
513     # tar.close()
514     # except IOError, exc:
515     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
516     # msg += str(exc)
517     # raise CrabException(msg)
518     # except tarfile.TarError, exc:
519     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
520     # msg += str(exc)
521     # raise CrabException(msg)
522 ewv 1.325
523 spiga 1.293 def CreateXML(self):
524     """
525 ewv 1.295 """
526 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
527     outfile = file( self.argsFile, 'w').write(str(result))
528 ewv 1.295 return
529 spiga 1.293
530     def addEntry(self, listDictions):
531     """
532     _addEntry_
533 ewv 1.295
534 spiga 1.293 add an entry to the xml file
535     """
536     ## load xml
537 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
538 spiga 1.293 entrname= 'Job'
539     for dictions in listDictions:
540     report = IMProvNode(entrname , None, **dictions)
541     improvDoc.addNode(report)
542 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
543 gutsche 1.3 return
544 ewv 1.131
545 gutsche 1.3 def numberOfJobs(self):
546 spiga 1.342 #wmbs
547 ewv 1.347 if self.automation==0:
548 spiga 1.342 return self.dict['njobs']
549     else:
550     return None
551 ewv 1.347
552 slacapra 1.1 def getTarBall(self, exe):
553     """
554     Return the TarBall with lib and exe
555     """
556 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
557     if os.path.exists(self.tgzNameWithPath):
558     return self.tgzNameWithPath
559 slacapra 1.1
560     # Prepare a tar gzipped file with user binaries.
561     self.buildTar_(exe)
562    
563 spiga 1.320 return string.strip(self.tgzNameWithPath)
564 slacapra 1.1
565     def buildTar_(self, executable):
566    
567     # First of all declare the user Scram area
568     swArea = self.scram.getSWArea_()
569     swReleaseTop = self.scram.getReleaseTop_()
570 ewv 1.131
571 slacapra 1.1 ## check if working area is release top
572     if swReleaseTop == '' or swArea == swReleaseTop:
573 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
574 slacapra 1.1 return
575    
576 slacapra 1.61 import tarfile
577     try: # create tar ball
578 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
579 slacapra 1.61 ## First find the executable
580 slacapra 1.86 if (self.executable != ''):
581 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
582     if ( not exeWithPath ):
583     raise CrabException('User executable '+executable+' not found')
584 ewv 1.131
585 slacapra 1.61 ## then check if it's private or not
586     if exeWithPath.find(swReleaseTop) == -1:
587     # the exe is private, so we must ship
588 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
589 slacapra 1.61 path = swArea+'/'
590 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
591     if exeWithPath.find(path) >= 0 :
592     exe = string.replace(exeWithPath, path,'')
593 slacapra 1.129 tar.add(path+exe,exe)
594 corvo 1.85 else :
595     tar.add(exeWithPath,os.path.basename(executable))
596 slacapra 1.61 pass
597     else:
598     # the exe is from release, we'll find it on WN
599     pass
600 ewv 1.131
601 slacapra 1.61 ## Now get the libraries: only those in local working area
602 slacapra 1.256 tar.dereference=True
603 slacapra 1.61 libDir = 'lib'
604     lib = swArea+'/' +libDir
605 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
606 slacapra 1.61 if os.path.exists(lib):
607     tar.add(lib,libDir)
608 ewv 1.131
609 slacapra 1.61 ## Now check if module dir is present
610     moduleDir = 'module'
611     module = swArea + '/' + moduleDir
612     if os.path.isdir(module):
613     tar.add(module,moduleDir)
614 slacapra 1.256 tar.dereference=False
615 slacapra 1.61
616     ## Now check if any data dir(s) is present
617 spiga 1.179 self.dataExist = False
618 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
619 slacapra 1.206 while len(todo_list):
620     entry, name = todo_list.pop()
621 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
622 slacapra 1.206 continue
623 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
624 slacapra 1.206 entryPath = entry + '/'
625 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
626 slacapra 1.206 if name == 'data':
627     self.dataExist=True
628 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
629 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
630 slacapra 1.206 pass
631     pass
632 ewv 1.182
633 spiga 1.179 ### CMSSW ParameterSet
634     if not self.pset is None:
635     cfg_file = common.work_space.jobDir()+self.configFilename()
636 ewv 1.357 pickleFile = common.work_space.jobDir()+self.configFilename() + '.pkl'
637 ewv 1.182 tar.add(cfg_file,self.configFilename())
638 ewv 1.357 tar.add(pickleFile,self.configFilename() + '.pkl')
639 ewv 1.313
640 spiga 1.309 try:
641     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
642     tar.add(crab_cfg_file,'crab.cfg')
643     except:
644     pass
645 fanzago 1.93
646 fanzago 1.152 ## Add ProdCommon dir to tar
647 slacapra 1.211 prodcommonDir = './'
648     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
649 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
650 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
651     'WMCore/__init__.py','WMCore/Algorithms']
652 slacapra 1.214 for file in neededStuff:
653     tar.add(prodcommonPath+file,prodcommonDir+file)
654 spiga 1.179
655     ##### ML stuff
656     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
657     path=os.environ['CRABDIR'] + '/python/'
658     for file in ML_file_list:
659     tar.add(path+file,file)
660    
661     ##### Utils
662 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
663 spiga 1.179 for file in Utils_file_list:
664     tar.add(path+file,file)
665 ewv 1.131
666 ewv 1.182 ##### AdditionalFiles
667 slacapra 1.253 tar.dereference=True
668 spiga 1.179 for file in self.additional_inbox_files:
669     tar.add(file,string.split(file,'/')[-1])
670 slacapra 1.253 tar.dereference=False
671 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
672 ewv 1.182
673 slacapra 1.61 tar.close()
674 mcinquil 1.241 except IOError, exc:
675 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
676 spiga 1.304 msg += str(exc)
677     raise CrabException(msg)
678 mcinquil 1.241 except tarfile.TarError, exc:
679 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
680 spiga 1.304 msg += str(exc)
681     raise CrabException(msg)
682 spiga 1.300
683 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
684     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
685 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
686 ewv 1.250 +'MB input sandbox limit \n'
687 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
688     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
689 spiga 1.358 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabServerForUsers#Server_available_for_users'
690 spiga 1.238 raise CrabException(msg)
691 gutsche 1.72
692 slacapra 1.61 ## create tar-ball with ML stuff
693 slacapra 1.97
694 spiga 1.165 def wsSetupEnvironment(self, nj=0):
695 slacapra 1.1 """
696     Returns part of a job script which prepares
697     the execution environment for the job 'nj'.
698     """
699 ewv 1.334 psetName = 'pset.py'
700    
701 slacapra 1.1 # Prepare JobType-independent part
702 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
703 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
704 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
705     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
706     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
707 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
708 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
709 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
710 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
711 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
712 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
713     txt += ' job_exit_code=10016\n'
714     txt += ' func_exit\n'
715 gutsche 1.3 txt += ' fi\n'
716 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
717 gutsche 1.3 txt += '\n'
718     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
719     txt += ' cd $WORKING_DIR\n'
720 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
721 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
722 spiga 1.282 #Setup SGE Environment
723 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
724 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
725    
726 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
727     txt += self.wsSetupCMSLCGEnvironment_()
728    
729 mcinquil 1.340 #Setup PBS Environment
730 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
731 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
732    
733 gutsche 1.3 txt += 'fi\n'
734 slacapra 1.1
735     # Prepare JobType-specific part
736     scram = self.scram.commandName()
737     txt += '\n\n'
738 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
739     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
740 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
741     txt += 'status=$?\n'
742     txt += 'if [ $status != 0 ] ; then\n'
743 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
744     txt += ' job_exit_code=10034\n'
745 fanzago 1.163 txt += ' func_exit\n'
746 slacapra 1.1 txt += 'fi \n'
747     txt += 'cd '+self.version+'\n'
748 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
749 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
750 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
751 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
752     txt += ' echo "ERROR ==> Problem with the command: "\n'
753     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
754     txt += ' job_exit_code=10034\n'
755     txt += ' func_exit\n'
756     txt += 'fi \n'
757 slacapra 1.1 # Handle the arguments:
758     txt += "\n"
759 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
760 slacapra 1.1 txt += "\n"
761 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
762 slacapra 1.1 txt += "then\n"
763 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
764     txt += ' job_exit_code=50113\n'
765     txt += " func_exit\n"
766 slacapra 1.1 txt += "fi\n"
767     txt += "\n"
768    
769     # Prepare job-specific part
770     job = common.job_list[nj]
771 ewv 1.131 if (self.datasetPath):
772 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
773     #DataTier = self.datasetPath.split("/")[2]
774 fanzago 1.93 txt += '\n'
775     txt += 'DatasetPath='+self.datasetPath+'\n'
776    
777 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
778 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
779 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
780 fanzago 1.93
781     else:
782 fanzago 1.318 #self.primaryDataset = 'null'
783 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
784     txt += 'PrimaryDataset=null\n'
785     txt += 'DataTier=null\n'
786     txt += 'ApplicationFamily=MCDataTier\n'
787 ewv 1.170 if self.pset != None:
788 spiga 1.42 pset = os.path.basename(job.configFilename())
789 ewv 1.357 pkl = os.path.basename(job.configFilename()) + '.pkl'
790 spiga 1.42 txt += '\n'
791 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
792 ewv 1.357 txt += 'cp $RUNTIME_AREA/'+pkl+' .\n'
793 spiga 1.296
794 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
795     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
796     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
797     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
798 slacapra 1.90
799 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
800 ewv 1.319 else:
801 spiga 1.314 txt += '\n'
802 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
803 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
804 gutsche 1.3 return txt
805 slacapra 1.176
806 fanzago 1.166 def wsUntarSoftware(self, nj=0):
807 gutsche 1.3 """
808     Put in the script the commands to build an executable
809     or a library.
810     """
811    
812 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
813 gutsche 1.3
814     if os.path.isfile(self.tgzNameWithPath):
815 spiga 1.358 txt += 'echo ">>> tar xzf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
816 fanzago 1.285 if self.debug_wrapper==1 :
817 spiga 1.358 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
818 spiga 1.199 txt += 'ls -Al \n'
819 spiga 1.358 else:
820     txt += 'tar zxf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
821 gutsche 1.3 txt += 'untar_status=$? \n'
822     txt += 'if [ $untar_status -ne 0 ]; then \n'
823 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
824     txt += ' job_exit_code=$untar_status\n'
825     txt += ' func_exit\n'
826 gutsche 1.3 txt += 'else \n'
827     txt += ' echo "Successful untar" \n'
828     txt += 'fi \n'
829 gutsche 1.50 txt += '\n'
830 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
831 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
832 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
833 gutsche 1.50 txt += 'else\n'
834 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
835 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
836 gutsche 1.50 txt += 'fi\n'
837     txt += '\n'
838    
839 gutsche 1.3 pass
840 ewv 1.131
841 slacapra 1.1 return txt
842 ewv 1.170
843 fanzago 1.166 def wsBuildExe(self, nj=0):
844     """
845     Put in the script the commands to build an executable
846     or a library.
847     """
848    
849     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
850     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
851    
852 ewv 1.170 txt += 'rm -r lib/ module/ \n'
853     txt += 'mv $RUNTIME_AREA/lib/ . \n'
854     txt += 'mv $RUNTIME_AREA/module/ . \n'
855 spiga 1.186 if self.dataExist == True:
856     txt += 'rm -r src/ \n'
857     txt += 'mv $RUNTIME_AREA/src/ . \n'
858 ewv 1.182 if len(self.additional_inbox_files)>0:
859 spiga 1.179 for file in self.additional_inbox_files:
860 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
861 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
862     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
863 ewv 1.170
864 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
865 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
866 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
867 fanzago 1.166 txt += 'else\n'
868 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
869 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
870     txt += 'fi\n'
871     txt += '\n'
872    
873 slacapra 1.302 if self.pset != None:
874 ewv 1.334 psetName = 'pset.py'
875    
876 slacapra 1.302 txt += '\n'
877     if self.debug_wrapper == 1:
878     txt += 'echo "***** cat ' + psetName + ' *********"\n'
879     txt += 'cat ' + psetName + '\n'
880     txt += 'echo "****** end ' + psetName + ' ********"\n'
881     txt += '\n'
882     txt += 'echo "***********************" \n'
883     txt += 'which edmConfigHash \n'
884     txt += 'echo "***********************" \n'
885 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
886     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
887 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
888     #### FEDE temporary fix for noEdm files #####
889     txt += 'if [ -z "$PSETHASH" ]; then \n'
890     txt += ' export PSETHASH=null\n'
891     txt += 'fi \n'
892     #############################################
893     txt += '\n'
894 fanzago 1.166 return txt
895 slacapra 1.1
896 ewv 1.131
897 slacapra 1.1 def executableName(self):
898 ewv 1.192 if self.scriptExe:
899 spiga 1.42 return "sh "
900     else:
901     return self.executable
902 slacapra 1.1
903     def executableArgs(self):
904 ewv 1.276 if self.scriptExe:
905 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
906 fanzago 1.115 else:
907 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
908 slacapra 1.1
909     def inputSandbox(self, nj):
910     """
911     Returns a list of filenames to be put in JDL input sandbox.
912     """
913     inp_box = []
914     if os.path.isfile(self.tgzNameWithPath):
915     inp_box.append(self.tgzNameWithPath)
916 spiga 1.320 if os.path.isfile(self.argsFile):
917     inp_box.append(self.argsFile)
918 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
919 slacapra 1.1 return inp_box
920    
921     def outputSandbox(self, nj):
922     """
923     Returns a list of filenames to be put in JDL output sandbox.
924     """
925     out_box = []
926    
927     ## User Declared output files
928 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
929 ewv 1.131 n_out = nj + 1
930 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
931 slacapra 1.1 return out_box
932    
933    
934     def wsRenameOutput(self, nj):
935     """
936     Returns part of a job script which renames the produced files.
937     """
938    
939 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
940 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
941     txt += 'echo ">>> current directory content:"\n'
942 fanzago 1.285 if self.debug_wrapper==1:
943 spiga 1.199 txt += 'ls -Al\n'
944 fanzago 1.145 txt += '\n'
945 slacapra 1.54
946 fanzago 1.128 for fileWithSuffix in (self.output_file):
947 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
948 slacapra 1.1 txt += '\n'
949 gutsche 1.7 txt += '# check output file\n'
950 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
951 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
952     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
953 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
954 ewv 1.147 else:
955     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
956     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
957 slacapra 1.106 txt += 'else\n'
958 fanzago 1.161 txt += ' job_exit_code=60302\n'
959     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
960 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
961 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
962     txt += ' echo "prepare dummy output file"\n'
963     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
964     txt += ' fi \n'
965 slacapra 1.1 txt += 'fi\n'
966 slacapra 1.105 file_list = []
967     for fileWithSuffix in (self.output_file):
968 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
969 ewv 1.131
970 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
971 fanzago 1.149 txt += '\n'
972 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
973     txt += 'echo ">>> current directory content:"\n'
974 fanzago 1.285 if self.debug_wrapper==1:
975 spiga 1.199 txt += 'ls -Al\n'
976 fanzago 1.148 txt += '\n'
977 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
978 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
979 slacapra 1.1 return txt
980    
981 slacapra 1.63 def getRequirements(self, nj=[]):
982 slacapra 1.1 """
983 ewv 1.131 return job requirements to add to jdl files
984 slacapra 1.1 """
985     req = ''
986 slacapra 1.47 if self.version:
987 slacapra 1.10 req='Member("VO-cms-' + \
988 slacapra 1.47 self.version + \
989 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
990 ewv 1.192 if self.executable_arch:
991 gutsche 1.107 req+=' && Member("VO-cms-' + \
992 slacapra 1.105 self.executable_arch + \
993     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
994 gutsche 1.35
995     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
996 spiga 1.353 if ( common.scheduler.name() in ["glite"] ):
997 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
998     if ( self.cfg_params.get('GRID.use_cream',None) ):
999     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
1000     else:
1001     req += ' && other.GlueCEStateStatus == "Production" '
1002 gutsche 1.35
1003 slacapra 1.1 return req
1004 gutsche 1.3
1005     def configFilename(self):
1006     """ return the config filename """
1007 ewv 1.334 return self.name()+'.py'
1008 gutsche 1.3
1009     def wsSetupCMSOSGEnvironment_(self):
1010     """
1011     Returns part of a job script which is prepares
1012     the execution environment and which is common for all CMS jobs.
1013     """
1014 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1015     txt += ' echo ">>> setup CMS OSG environment:"\n'
1016 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1017     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1018 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1019 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1020 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1021 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1022     txt += ' else\n'
1023 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1024     txt += ' job_exit_code=10020\n'
1025     txt += ' func_exit\n'
1026 fanzago 1.133 txt += ' fi\n'
1027 gutsche 1.3 txt += '\n'
1028 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1029 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1030 gutsche 1.3
1031     return txt
1032 ewv 1.131
1033 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1034     """
1035     Returns part of a job script which is prepares
1036     the execution environment and which is common for all CMS jobs.
1037     """
1038 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1039     txt += ' echo ">>> setup CMS LCG environment:"\n'
1040 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1041     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1042     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1043     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1044 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1045     txt += ' job_exit_code=10031\n'
1046     txt += ' func_exit\n'
1047 fanzago 1.133 txt += ' else\n'
1048     txt += ' echo "Sourcing environment... "\n'
1049     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1050 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1051     txt += ' job_exit_code=10020\n'
1052     txt += ' func_exit\n'
1053 fanzago 1.133 txt += ' fi\n'
1054     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1055     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1056     txt += ' result=$?\n'
1057     txt += ' if [ $result -ne 0 ]; then\n'
1058 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1059     txt += ' job_exit_code=10032\n'
1060     txt += ' func_exit\n'
1061 fanzago 1.133 txt += ' fi\n'
1062     txt += ' fi\n'
1063     txt += ' \n'
1064 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1065 gutsche 1.3 return txt
1066 gutsche 1.5
1067 spiga 1.238 def wsModifyReport(self, nj):
1068 fanzago 1.93 """
1069 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1070 fanzago 1.93 """
1071 ewv 1.250
1072 fanzago 1.281 txt = ''
1073 fanzago 1.292 if (self.copy_data == 1):
1074 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1075 ewv 1.283
1076 spiga 1.238
1077 fanzago 1.344 txt += 'if [ $StageOutExitStatus -eq 0 ] || [ $StageOutExitStatus -eq 60308 ] ; then\n'
1078 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1079 fanzago 1.175 txt += 'else\n'
1080     txt += ' FOR_LFN=/copy_problems/ \n'
1081     txt += 'fi\n'
1082 ewv 1.182
1083 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1084 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1085 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1086 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1087     txt += 'echo "endpoint = $endpoint"\n'
1088     txt += 'SE_PATH=$endpoint\n'
1089     txt += 'echo "SE_PATH = $endpoint"\n'
1090 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1091     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1092 fanzago 1.344
1093 fanzago 1.281
1094 farinafa 1.346 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $OutUniqueID for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1095 fanzago 1.318 if (self.publish_data == 1):
1096     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1097 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1098     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1099 fanzago 1.281
1100 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1101     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1102 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1103     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1104     txt += ' modifyReport_result=70500\n'
1105     txt += ' job_exit_code=$modifyReport_result\n'
1106     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1107     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1108     txt += 'else\n'
1109     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1110 spiga 1.103 txt += 'fi\n'
1111 fanzago 1.93 return txt
1112 ewv 1.283
1113 ewv 1.192 def wsParseFJR(self):
1114 spiga 1.189 """
1115 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1116 spiga 1.189 """
1117     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1118     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1119     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1120     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1121 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1122 fanzago 1.285 if self.debug_wrapper==1 :
1123 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1124     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1125 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1126     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1127 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1128 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1129 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1130     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1131 spiga 1.189 txt += ' else\n'
1132     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1133     txt += ' fi\n'
1134     txt += ' else\n'
1135     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1136     txt += ' fi\n'
1137     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1138 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1139 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1140 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1141 spiga 1.296 """
1142 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1143 spiga 1.189 # VERIFY PROCESSED DATA
1144 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1145     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1146     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1147     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1148     txt += ' mv tmp.txt input-files.txt\n'
1149     txt += ' echo "cat input-files.txt"\n'
1150     txt += ' echo "----------------------"\n'
1151     txt += ' cat input-files.txt\n'
1152     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1153     txt += ' mv tmp.txt processed-files.txt\n'
1154     txt += ' echo "----------------------"\n'
1155     txt += ' echo "cat processed-files.txt"\n'
1156     txt += ' echo "----------------------"\n'
1157     txt += ' cat processed-files.txt\n'
1158     txt += ' echo "----------------------"\n'
1159 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1160 fanzago 1.273 txt += ' fileverify_status=$?\n'
1161     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1162     txt += ' executable_exit_status=30001\n'
1163     txt += ' echo "ERROR ==> not all input files processed"\n'
1164     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1165     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1166     txt += ' fi\n'
1167 spiga 1.296 """
1168 spiga 1.232 txt += ' fi\n'
1169 spiga 1.189 txt += 'else\n'
1170     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1171     txt += 'fi\n'
1172     txt += '\n'
1173 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1174 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1175     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1176     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1177     txt += ' job_exit_code=$executable_exit_status\n'
1178     txt += ' func_exit\n'
1179     txt += 'fi\n\n'
1180 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1181     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1182     txt += 'job_exit_code=$executable_exit_status\n'
1183    
1184     return txt
1185    
1186 gutsche 1.5 def setParam_(self, param, value):
1187     self._params[param] = value
1188    
1189     def getParams(self):
1190     return self._params
1191 gutsche 1.8
1192 spiga 1.257 def outList(self,list=False):
1193 mcinquil 1.121 """
1194     check the dimension of the output files
1195     """
1196 spiga 1.169 txt = ''
1197     txt += 'echo ">>> list of expected files on output sandbox"\n'
1198 mcinquil 1.121 listOutFiles = []
1199 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1200 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1201 spiga 1.268 if len(self.output_file) <= 0:
1202     msg ="WARNING: no output files name have been defined!!\n"
1203     msg+="\tno output files will be reported back/staged\n"
1204 spiga 1.304 common.logger.info(msg)
1205 ewv 1.350
1206 fanzago 1.148 if (self.return_data == 1):
1207 farinafa 1.348 for file in (self.output_file):
1208     listOutFiles.append(numberFile(file, '$OutUniqueID'))
1209     for file in (self.output_file_sandbox):
1210     listOutFiles.append(numberFile(file, '$NJob'))
1211     listOutFiles.append(stdout)
1212     listOutFiles.append(stderr)
1213    
1214 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1215 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1216 spiga 1.169 txt += 'export filesToCheck\n'
1217 spiga 1.341 taskinfo={}
1218     taskinfo['outfileBasename'] = self.output_file
1219     common._db.updateTask_(taskinfo)
1220 ewv 1.276
1221 spiga 1.257 if list : return self.output_file
1222 ewv 1.170 return txt
1223 ewv 1.355
1224     def checkCMSSWVersion(self, url = "https://cmstags.cern.ch/cgi-bin/CmsTC/", fileName = "ReleasesXML"):
1225     """
1226     compare current CMSSW release and arch with allowed releases
1227     """
1228    
1229     downloader = Downloader(url)
1230     goodRelease = False
1231    
1232     try:
1233     result = downloader.config(fileName)
1234     except:
1235     common.logger.info("ERROR: Problem reading file of allowed CMSSW releases.")
1236    
1237     try:
1238     events = pulldom.parseString(result)
1239    
1240     arch = None
1241     release = None
1242     relType = None
1243     relState = None
1244     for (event, node) in events:
1245     if event == pulldom.START_ELEMENT:
1246     if node.tagName == 'architecture':
1247     arch = node.attributes.getNamedItem('name').nodeValue
1248     if node.tagName == 'project':
1249     relType = node.attributes.getNamedItem('type').nodeValue
1250     relState = node.attributes.getNamedItem('state').nodeValue
1251     if relType == 'Production' and relState == 'Announced':
1252     release = node.attributes.getNamedItem('label').nodeValue
1253     if self.executable_arch == arch and self.version == release:
1254     goodRelease = True
1255     return goodRelease
1256    
1257     if not goodRelease:
1258     msg = "WARNING: %s on %s is not a supported release. " % \
1259     (self.version, self.executable_arch)
1260     msg += "Submission may fail."
1261     common.logger.info(msg)
1262     except:
1263     common.logger.info("Problems parsing file of allowed CMSSW releases.")
1264    
1265     return goodRelease
1266