ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.347
Committed: Wed Dec 23 14:29:34 2009 UTC (15 years, 4 months ago) by ewv
Content type: text/x-python
Branch: MAIN
Changes since 1.346: +11 -8 lines
Log Message:
Set SCRAM_ARCH on all middlewares

File Contents

# User Rev Content
1 ewv 1.327
2 ewv 1.347 __revision__ = "$Id: cms_cmssw.py,v 1.346 2009/12/15 13:13:41 farinafa Exp $"
3     __version__ = "$Revision: 1.346 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 farinafa 1.346 self.argsList = 2
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.338 ll = len(self.datasetPath.split("/"))
92     if (ll < 4):
93     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
94     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
95     raise CrabException(msg)
96 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
97     self.dataTier = self.datasetPath.split("/")[2]
98 gutsche 1.5
99 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
100 ewv 1.330 self.ads = False
101     if self.datasetPath:
102     self.ads = len(self.datasetPath.split("/")) > 4
103 ewv 1.295
104 ewv 1.327 # FUTURE: Can remove this check
105     if self.ads and self.CMSSW_major < 3:
106     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
107     common.logger.info(' Only file level, not lumi level, granularity is supported.')
108    
109 spiga 1.288 self.debugWrap=''
110 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
111     if self.debug_wrapper == 1: self.debugWrap='--debug'
112 slacapra 1.291
113 slacapra 1.1 ## now the application
114 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
115 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
116 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
117 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
118 slacapra 1.1
119 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
120 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
121 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
122 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
123 slacapra 1.153 if self.pset.lower() != 'none' :
124     if (not os.path.exists(self.pset)):
125     raise CrabException("User defined PSet file "+self.pset+" does not exist")
126     else:
127     self.pset = None
128 slacapra 1.1
129     # output files
130 slacapra 1.53 ## stuff which must be returned always via sandbox
131     self.output_file_sandbox = []
132    
133     # add fjr report by default via sandbox
134     self.output_file_sandbox.append(self.fjrFileName)
135    
136     # other output files to be returned via sandbox or copied to SE
137 mcinquil 1.216 outfileflag = False
138 slacapra 1.153 self.output_file = []
139     tmp = cfg_params.get('CMSSW.output_file',None)
140     if tmp :
141 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
142 mcinquil 1.216 outfileflag = True #output found
143     #else:
144     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1
146     # script_exe file as additional file in inputSandbox
147 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
148     if self.scriptExe :
149 slacapra 1.176 if not os.path.isfile(self.scriptExe):
150     msg ="ERROR. file "+self.scriptExe+" not found"
151     raise CrabException(msg)
152     self.additional_inbox_files.append(string.strip(self.scriptExe))
153 slacapra 1.70
154 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
155     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
156    
157 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
158 slacapra 1.176 msg ="Error. script_exe not defined"
159     raise CrabException(msg)
160 spiga 1.42
161 ewv 1.226 # use parent files...
162 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
163 spiga 1.204
164 slacapra 1.1 ## additional input files
165 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
166 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
167 slacapra 1.70 for tmp in tmpAddFiles:
168     tmp = string.strip(tmp)
169     dirname = ''
170     if not tmp[0]=="/": dirname = "."
171 corvo 1.85 files = []
172     if string.find(tmp,"*")>-1:
173     files = glob.glob(os.path.join(dirname, tmp))
174     if len(files)==0:
175     raise CrabException("No additional input file found with this pattern: "+tmp)
176     else:
177     files.append(tmp)
178 slacapra 1.70 for file in files:
179     if not os.path.exists(file):
180     raise CrabException("Additional input file not found: "+file)
181 slacapra 1.45 pass
182 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
183 slacapra 1.1 pass
184     pass
185 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
186 slacapra 1.153 pass
187 gutsche 1.3
188 gutsche 1.35
189 ewv 1.160 ## New method of dealing with seeds
190     self.incrementSeeds = []
191     self.preserveSeeds = []
192     if cfg_params.has_key('CMSSW.preserve_seeds'):
193     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
194     for tmp in tmpList:
195     tmp.strip()
196     self.preserveSeeds.append(tmp)
197     if cfg_params.has_key('CMSSW.increment_seeds'):
198     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
199     for tmp in tmpList:
200     tmp.strip()
201     self.incrementSeeds.append(tmp)
202    
203 fanzago 1.318 # Copy/return/publish
204 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
205     self.return_data = int(cfg_params.get('USER.return_data',0))
206 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
207     if (self.publish_data == 1):
208     if not cfg_params.has_key('USER.publish_data_name'):
209     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
210     else:
211     self.processedDataset = cfg_params['USER.publish_data_name']
212 ewv 1.329 """
213 fanzago 1.328 #### check of length of datasetname to publish ####
214 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
215 fanzago 1.328 print "test 100 char limit on datasetname"
216     ###
217     len_file = 0
218     print "self.output_file = ", self.output_file
219     for file in self.output_file:
220     length = len(file)
221     if length > len_file:
222     len_file = length
223 ewv 1.329 print "len_file = ", len_file
224 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
225 ewv 1.329 ###
226 fanzago 1.318 user = getUserName()
227 fanzago 1.328 len_user_name = len(user)
228 fanzago 1.318 common.logger.debug("user = " + user)
229 fanzago 1.328 print "len_user_name = ", len_user_name
230 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
231 fanzago 1.328
232 fanzago 1.318 len_processedDataset = len(self.processedDataset)
233     common.logger.debug("processedDataset " + self.processedDataset)
234     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
235 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
236 ewv 1.329
237 fanzago 1.318 if (self.datasetPath != None ):
238     len_primary = len(self.primaryDataset)
239     common.logger.debug("primaryDataset = " + self.primaryDataset)
240     common.logger.debug("len_primary = " + str(len_primary))
241 fanzago 1.328 if (len_primary > 100):
242     raise CrabException("Warning: primary datasetname has to be < 100 characters")
243     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
244     #if (len_processedDataset > (59 - len_user_name - len_primary)):
245     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
246     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
247     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
248 fanzago 1.318 else:
249 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
250     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
251     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
252     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
253 ewv 1.329 """
254 ewv 1.276
255     self.conf = {}
256     self.conf['pubdata'] = None
257 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
258 slacapra 1.1 #DBSDLS-start
259 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
260 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
261     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
262 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
263 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
264 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
265 gutsche 1.35 blockSites = {}
266 spiga 1.342 #wmbs
267     self.automation = int(self.cfg_params.get('WMBS.automation',0))
268     if self.automation == 0:
269     if self.datasetPath:
270     blockSites = self.DataDiscoveryAndLocation(cfg_params)
271     #DBSDLS-end
272     self.conf['blockSites']=blockSites
273 ewv 1.347
274 spiga 1.342 ## Select Splitting
275     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
276 ewv 1.347
277 spiga 1.342 if self.selectNoInput:
278     if self.pset == None:
279     self.algo = 'ForScript'
280     else:
281     self.algo = 'NoInput'
282     self.conf['managedGenerators']=self.managedGenerators
283     self.conf['generator']=self.generator
284     elif self.ads:
285     self.algo = 'LumiBased'
286     elif splitByRun ==1:
287     self.algo = 'RunBased'
288 spiga 1.42 else:
289 spiga 1.342 self.algo = 'EventBased'
290     common.logger.debug("Job splitting method: %s" % self.algo)
291 ewv 1.347
292 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
293     self.dict = splitter.Algos()[self.algo]()
294 gutsche 1.5
295 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
296     self.rootArgsFilename= 'arguments'
297 spiga 1.208 # modify Pset only the first time
298 spiga 1.320 if isNew:
299     if self.pset != None: self.ModifyPset()
300 spiga 1.300
301 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
302     self.tarNameWithPath = self.getTarBall(self.executable)
303 spiga 1.293
304    
305     def ModifyPset(self):
306     import PsetManipulator as pp
307 ewv 1.335
308     # If pycfg_params set, fake out the config script
309     # to make it think it was called with those args
310     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
311     if pycfg_params:
312     trueArgv = sys.argv
313     sys.argv = [self.pset]
314     sys.argv.extend(pycfg_params.split(' '))
315 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
316 ewv 1.335 if pycfg_params: # Restore original sys.argv
317     sys.argv = trueArgv
318    
319 spiga 1.293 try:
320     # Add FrameworkJobReport to parameter-set, set max events.
321     # Reset later for data jobs by writeCFG which does all modifications
322 ewv 1.295 PsetEdit.maxEvent(1)
323 spiga 1.293 PsetEdit.skipEvent(0)
324     PsetEdit.psetWriter(self.configFilename())
325     ## If present, add TFileService to output files
326     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
327     tfsOutput = PsetEdit.getTFileService()
328     if tfsOutput:
329     if tfsOutput in self.output_file:
330 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
331 spiga 1.293 else:
332     outfileflag = True #output found
333     self.output_file.append(tfsOutput)
334 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
335 spiga 1.293 pass
336     pass
337 ewv 1.321 # If present and requested, add PoolOutputModule to output files
338 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
339 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
340     if edmOutput:
341 ewv 1.321 for outputFile in edmOutput:
342     if outputFile in self.output_file:
343 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
344 ewv 1.321 else:
345     self.output_file.append(outputFile)
346     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
347     # not requested, check anyhow to avoid accidental T2 overload
348 slacapra 1.297 else:
349 ewv 1.321 if edmOutput:
350     missedFiles = []
351     for outputFile in edmOutput:
352     if outputFile not in self.output_file:
353     missedFiles.append(outputFile)
354     if missedFiles:
355     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
356     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
357     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
358     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
359     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
360     common.logger.info(msg)
361 spiga 1.322 else :
362 ewv 1.321 raise CrabException(msg)
363 ewv 1.301
364     if (PsetEdit.getBadFilesSetting()):
365     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
366 spiga 1.304 common.logger.info(msg)
367 ewv 1.301
368 slacapra 1.297 except CrabException, msg:
369 spiga 1.304 common.logger.info(str(msg))
370 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
371 spiga 1.293 raise CrabException(msg)
372    
373 gutsche 1.3
374 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
375    
376 slacapra 1.86 import DataDiscovery
377     import DataLocation
378 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
379 gutsche 1.3
380     datasetPath=self.datasetPath
381    
382 slacapra 1.1 ## Contact the DBS
383 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
384 slacapra 1.1 try:
385 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
386 slacapra 1.1 self.pubdata.fetchDBSInfo()
387    
388 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
389 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
390     raise CrabException(msg)
391 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
392 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
393     raise CrabException(msg)
394 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
395 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
396 slacapra 1.1 raise CrabException(msg)
397    
398 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
399 slacapra 1.270 #print self.filesbyblock
400 spiga 1.269 self.conf['pubdata']=self.pubdata
401 gutsche 1.3
402 slacapra 1.1 ## get max number of events
403 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
404 slacapra 1.1
405     ## Contact the DLS and build a list of sites hosting the fileblocks
406     try:
407 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
408 gutsche 1.6 dataloc.fetchDLSInfo()
409 slacapra 1.263
410 slacapra 1.41 except DataLocation.DataLocationError , ex:
411 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
412     raise CrabException(msg)
413 ewv 1.131
414 slacapra 1.1
415 slacapra 1.270 unsorted_sites = dataloc.getSites()
416     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
417     for lfn in self.filesbyblock.keys():
418     if unsorted_sites.has_key(lfn):
419     sites[lfn]=unsorted_sites[lfn]
420     else:
421     sites[lfn]=[]
422    
423 slacapra 1.264 if len(sites)==0:
424 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
425     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
426     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
427 slacapra 1.264 raise CrabException(msg)
428    
429 gutsche 1.35 allSites = []
430     listSites = sites.values()
431 slacapra 1.63 for listSite in listSites:
432     for oneSite in listSite:
433 gutsche 1.35 allSites.append(oneSite)
434 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
435 ewv 1.295
436 gutsche 1.3
437 gutsche 1.92 # screen output
438 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
439 gutsche 1.92
440 gutsche 1.35 return sites
441 ewv 1.131
442 spiga 1.42
443 spiga 1.208 def split(self, jobParams,firstJobID):
444 ewv 1.276
445 spiga 1.293 jobParams = self.dict['args']
446 spiga 1.269 njobs = self.dict['njobs']
447     self.jobDestination = self.dict['jobDestination']
448 ewv 1.131
449 ewv 1.333 if njobs == 0:
450     raise CrabException("Asked to split zero jobs: aborting")
451     if not self.server and not self.local and njobs > 500:
452     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
453 slacapra 1.263
454 gutsche 1.3 # create the empty structure
455     for i in range(njobs):
456     jobParams.append("")
457 ewv 1.131
458 spiga 1.165 listID=[]
459     listField=[]
460 spiga 1.293 listDictions=[]
461 spiga 1.300 exist= os.path.exists(self.argsFile)
462 spiga 1.208 for id in range(njobs):
463     job = id + int(firstJobID)
464 spiga 1.167 listID.append(job+1)
465 spiga 1.162 job_ToSave ={}
466 spiga 1.169 concString = ' '
467 spiga 1.165 argu=''
468 spiga 1.293 str_argu = str(job+1)
469 spiga 1.208 if len(jobParams[id]):
470 ewv 1.295 argu = {'JobID': job+1}
471 spiga 1.293 for i in range(len(jobParams[id])):
472     argu[self.dict['params'][i]]=jobParams[id][i]
473 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
474 ewv 1.295 # just for debug
475 spiga 1.293 str_argu += concString.join(jobParams[id])
476 spiga 1.314 if argu != '': listDictions.append(argu)
477 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
478 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
479 spiga 1.165 listField.append(job_ToSave)
480 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
481     cms_se = CmsSEMap()
482 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
483 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
484 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
485     msg+="\t CMSDestination: %s "%(str(SEDestination))
486 spiga 1.307 common.logger.log(10-1,msg)
487 spiga 1.293 # write xml
488 ewv 1.295 if len(listDictions):
489 spiga 1.293 if exist==False: self.CreateXML()
490     self.addEntry(listDictions)
491 spiga 1.320 # self.zipXMLfile()
492 spiga 1.187 common._db.updateJob_(listID,listField)
493 spiga 1.293 return
494 ewv 1.313
495 spiga 1.320 # def zipXMLfile(self):
496 ewv 1.313
497 spiga 1.320 # import tarfile
498     # try:
499     # tar = tarfile.open(self.tarNameWithPath, "a")
500     # tar.add(self.argsFile, os.path.basename(self.argsFile))
501     # tar.close()
502     # except IOError, exc:
503     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
504     # msg += str(exc)
505     # raise CrabException(msg)
506     # except tarfile.TarError, exc:
507     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
508     # msg += str(exc)
509     # raise CrabException(msg)
510 ewv 1.325
511 spiga 1.293 def CreateXML(self):
512     """
513 ewv 1.295 """
514 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
515     outfile = file( self.argsFile, 'w').write(str(result))
516 ewv 1.295 return
517 spiga 1.293
518     def addEntry(self, listDictions):
519     """
520     _addEntry_
521 ewv 1.295
522 spiga 1.293 add an entry to the xml file
523     """
524     from IMProv.IMProvLoader import loadIMProvFile
525     ## load xml
526 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
527 spiga 1.293 entrname= 'Job'
528     for dictions in listDictions:
529     report = IMProvNode(entrname , None, **dictions)
530     improvDoc.addNode(report)
531 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
532 gutsche 1.3 return
533 ewv 1.131
534 gutsche 1.3 def numberOfJobs(self):
535 spiga 1.342 #wmbs
536 ewv 1.347 if self.automation==0:
537 spiga 1.342 return self.dict['njobs']
538     else:
539     return None
540 ewv 1.347
541 slacapra 1.1 def getTarBall(self, exe):
542     """
543     Return the TarBall with lib and exe
544     """
545 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
546     if os.path.exists(self.tgzNameWithPath):
547     return self.tgzNameWithPath
548 slacapra 1.1
549     # Prepare a tar gzipped file with user binaries.
550     self.buildTar_(exe)
551    
552 spiga 1.320 return string.strip(self.tgzNameWithPath)
553 slacapra 1.1
554     def buildTar_(self, executable):
555    
556     # First of all declare the user Scram area
557     swArea = self.scram.getSWArea_()
558     swReleaseTop = self.scram.getReleaseTop_()
559 ewv 1.131
560 slacapra 1.1 ## check if working area is release top
561     if swReleaseTop == '' or swArea == swReleaseTop:
562 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
563 slacapra 1.1 return
564    
565 slacapra 1.61 import tarfile
566     try: # create tar ball
567 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
568 slacapra 1.61 ## First find the executable
569 slacapra 1.86 if (self.executable != ''):
570 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
571     if ( not exeWithPath ):
572     raise CrabException('User executable '+executable+' not found')
573 ewv 1.131
574 slacapra 1.61 ## then check if it's private or not
575     if exeWithPath.find(swReleaseTop) == -1:
576     # the exe is private, so we must ship
577 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
578 slacapra 1.61 path = swArea+'/'
579 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
580     if exeWithPath.find(path) >= 0 :
581     exe = string.replace(exeWithPath, path,'')
582 slacapra 1.129 tar.add(path+exe,exe)
583 corvo 1.85 else :
584     tar.add(exeWithPath,os.path.basename(executable))
585 slacapra 1.61 pass
586     else:
587     # the exe is from release, we'll find it on WN
588     pass
589 ewv 1.131
590 slacapra 1.61 ## Now get the libraries: only those in local working area
591 slacapra 1.256 tar.dereference=True
592 slacapra 1.61 libDir = 'lib'
593     lib = swArea+'/' +libDir
594 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
595 slacapra 1.61 if os.path.exists(lib):
596     tar.add(lib,libDir)
597 ewv 1.131
598 slacapra 1.61 ## Now check if module dir is present
599     moduleDir = 'module'
600     module = swArea + '/' + moduleDir
601     if os.path.isdir(module):
602     tar.add(module,moduleDir)
603 slacapra 1.256 tar.dereference=False
604 slacapra 1.61
605     ## Now check if any data dir(s) is present
606 spiga 1.179 self.dataExist = False
607 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
608 slacapra 1.206 while len(todo_list):
609     entry, name = todo_list.pop()
610 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
611 slacapra 1.206 continue
612 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
613 slacapra 1.206 entryPath = entry + '/'
614 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
615 slacapra 1.206 if name == 'data':
616     self.dataExist=True
617 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
618 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
619 slacapra 1.206 pass
620     pass
621 ewv 1.182
622 spiga 1.179 ### CMSSW ParameterSet
623     if not self.pset is None:
624     cfg_file = common.work_space.jobDir()+self.configFilename()
625 ewv 1.182 tar.add(cfg_file,self.configFilename())
626 ewv 1.313
627 spiga 1.309 try:
628     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
629     tar.add(crab_cfg_file,'crab.cfg')
630     except:
631     pass
632 fanzago 1.93
633 fanzago 1.152 ## Add ProdCommon dir to tar
634 slacapra 1.211 prodcommonDir = './'
635     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
636 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
637 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
638     'WMCore/__init__.py','WMCore/Algorithms']
639 slacapra 1.214 for file in neededStuff:
640     tar.add(prodcommonPath+file,prodcommonDir+file)
641 spiga 1.179
642     ##### ML stuff
643     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
644     path=os.environ['CRABDIR'] + '/python/'
645     for file in ML_file_list:
646     tar.add(path+file,file)
647    
648     ##### Utils
649 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
650 spiga 1.179 for file in Utils_file_list:
651     tar.add(path+file,file)
652 ewv 1.131
653 ewv 1.182 ##### AdditionalFiles
654 slacapra 1.253 tar.dereference=True
655 spiga 1.179 for file in self.additional_inbox_files:
656     tar.add(file,string.split(file,'/')[-1])
657 slacapra 1.253 tar.dereference=False
658 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
659 ewv 1.182
660 slacapra 1.61 tar.close()
661 mcinquil 1.241 except IOError, exc:
662 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
663 spiga 1.304 msg += str(exc)
664     raise CrabException(msg)
665 mcinquil 1.241 except tarfile.TarError, exc:
666 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
667 spiga 1.304 msg += str(exc)
668     raise CrabException(msg)
669 spiga 1.300
670 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
671     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
672 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
673 ewv 1.250 +'MB input sandbox limit \n'
674 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
675     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
676 mcinquil 1.336 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServerForUsers#Server_available_for_users'
677 spiga 1.238 raise CrabException(msg)
678 gutsche 1.72
679 slacapra 1.61 ## create tar-ball with ML stuff
680 slacapra 1.97
681 spiga 1.165 def wsSetupEnvironment(self, nj=0):
682 slacapra 1.1 """
683     Returns part of a job script which prepares
684     the execution environment for the job 'nj'.
685     """
686 ewv 1.334 psetName = 'pset.py'
687    
688 slacapra 1.1 # Prepare JobType-independent part
689 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
690 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
691 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
692     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
693     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
694 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
695 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
696 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
697 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
698 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
699 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
700     txt += ' job_exit_code=10016\n'
701     txt += ' func_exit\n'
702 gutsche 1.3 txt += ' fi\n'
703 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
704 gutsche 1.3 txt += '\n'
705     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
706     txt += ' cd $WORKING_DIR\n'
707 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
708 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
709 spiga 1.282 #Setup SGE Environment
710 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
711 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
712    
713 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
714     txt += self.wsSetupCMSLCGEnvironment_()
715    
716 mcinquil 1.340 #Setup PBS Environment
717 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
718 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
719    
720 gutsche 1.3 txt += 'fi\n'
721 slacapra 1.1
722     # Prepare JobType-specific part
723     scram = self.scram.commandName()
724     txt += '\n\n'
725 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
726     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
727 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
728     txt += 'status=$?\n'
729     txt += 'if [ $status != 0 ] ; then\n'
730 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
731     txt += ' job_exit_code=10034\n'
732 fanzago 1.163 txt += ' func_exit\n'
733 slacapra 1.1 txt += 'fi \n'
734     txt += 'cd '+self.version+'\n'
735 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
736 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
737 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
738 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
739     txt += ' echo "ERROR ==> Problem with the command: "\n'
740     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
741     txt += ' job_exit_code=10034\n'
742     txt += ' func_exit\n'
743     txt += 'fi \n'
744 slacapra 1.1 # Handle the arguments:
745     txt += "\n"
746 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
747 slacapra 1.1 txt += "\n"
748 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
749 slacapra 1.1 txt += "then\n"
750 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
751     txt += ' job_exit_code=50113\n'
752     txt += " func_exit\n"
753 slacapra 1.1 txt += "fi\n"
754     txt += "\n"
755    
756     # Prepare job-specific part
757     job = common.job_list[nj]
758 ewv 1.131 if (self.datasetPath):
759 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
760     #DataTier = self.datasetPath.split("/")[2]
761 fanzago 1.93 txt += '\n'
762     txt += 'DatasetPath='+self.datasetPath+'\n'
763    
764 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
765 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
766 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
767 fanzago 1.93
768     else:
769 fanzago 1.318 #self.primaryDataset = 'null'
770 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
771     txt += 'PrimaryDataset=null\n'
772     txt += 'DataTier=null\n'
773     txt += 'ApplicationFamily=MCDataTier\n'
774 ewv 1.170 if self.pset != None:
775 spiga 1.42 pset = os.path.basename(job.configFilename())
776     txt += '\n'
777 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
778 spiga 1.296
779 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
780     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
781     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
782     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
783 slacapra 1.90
784 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
785 ewv 1.319 else:
786 spiga 1.314 txt += '\n'
787 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
788 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
789 gutsche 1.3 return txt
790 slacapra 1.176
791 fanzago 1.166 def wsUntarSoftware(self, nj=0):
792 gutsche 1.3 """
793     Put in the script the commands to build an executable
794     or a library.
795     """
796    
797 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
798 gutsche 1.3
799     if os.path.isfile(self.tgzNameWithPath):
800 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
801 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
802 fanzago 1.285 if self.debug_wrapper==1 :
803 spiga 1.199 txt += 'ls -Al \n'
804 gutsche 1.3 txt += 'untar_status=$? \n'
805     txt += 'if [ $untar_status -ne 0 ]; then \n'
806 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
807     txt += ' job_exit_code=$untar_status\n'
808     txt += ' func_exit\n'
809 gutsche 1.3 txt += 'else \n'
810     txt += ' echo "Successful untar" \n'
811     txt += 'fi \n'
812 gutsche 1.50 txt += '\n'
813 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
814 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
815 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
816 gutsche 1.50 txt += 'else\n'
817 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
818 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
819 gutsche 1.50 txt += 'fi\n'
820     txt += '\n'
821    
822 gutsche 1.3 pass
823 ewv 1.131
824 slacapra 1.1 return txt
825 ewv 1.170
826 fanzago 1.166 def wsBuildExe(self, nj=0):
827     """
828     Put in the script the commands to build an executable
829     or a library.
830     """
831    
832     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
833     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
834    
835 ewv 1.170 txt += 'rm -r lib/ module/ \n'
836     txt += 'mv $RUNTIME_AREA/lib/ . \n'
837     txt += 'mv $RUNTIME_AREA/module/ . \n'
838 spiga 1.186 if self.dataExist == True:
839     txt += 'rm -r src/ \n'
840     txt += 'mv $RUNTIME_AREA/src/ . \n'
841 ewv 1.182 if len(self.additional_inbox_files)>0:
842 spiga 1.179 for file in self.additional_inbox_files:
843 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
844 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
845     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
846 ewv 1.170
847 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
848 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
849 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
850 fanzago 1.166 txt += 'else\n'
851 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
852 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
853     txt += 'fi\n'
854     txt += '\n'
855    
856 slacapra 1.302 if self.pset != None:
857 ewv 1.334 psetName = 'pset.py'
858    
859 slacapra 1.302 txt += '\n'
860     if self.debug_wrapper == 1:
861     txt += 'echo "***** cat ' + psetName + ' *********"\n'
862     txt += 'cat ' + psetName + '\n'
863     txt += 'echo "****** end ' + psetName + ' ********"\n'
864     txt += '\n'
865     txt += 'echo "***********************" \n'
866     txt += 'which edmConfigHash \n'
867     txt += 'echo "***********************" \n'
868 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
869     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
870 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
871     #### FEDE temporary fix for noEdm files #####
872     txt += 'if [ -z "$PSETHASH" ]; then \n'
873     txt += ' export PSETHASH=null\n'
874     txt += 'fi \n'
875     #############################################
876     txt += '\n'
877 fanzago 1.166 return txt
878 slacapra 1.1
879 ewv 1.131
880 slacapra 1.1 def executableName(self):
881 ewv 1.192 if self.scriptExe:
882 spiga 1.42 return "sh "
883     else:
884     return self.executable
885 slacapra 1.1
886     def executableArgs(self):
887 ewv 1.276 if self.scriptExe:
888 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
889 fanzago 1.115 else:
890 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
891 slacapra 1.1
892     def inputSandbox(self, nj):
893     """
894     Returns a list of filenames to be put in JDL input sandbox.
895     """
896     inp_box = []
897     if os.path.isfile(self.tgzNameWithPath):
898     inp_box.append(self.tgzNameWithPath)
899 spiga 1.320 if os.path.isfile(self.argsFile):
900     inp_box.append(self.argsFile)
901 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
902 slacapra 1.1 return inp_box
903    
904     def outputSandbox(self, nj):
905     """
906     Returns a list of filenames to be put in JDL output sandbox.
907     """
908     out_box = []
909    
910     ## User Declared output files
911 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
912 ewv 1.131 n_out = nj + 1
913 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
914 slacapra 1.1 return out_box
915    
916    
917     def wsRenameOutput(self, nj):
918     """
919     Returns part of a job script which renames the produced files.
920     """
921    
922 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
923 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
924     txt += 'echo ">>> current directory content:"\n'
925 fanzago 1.285 if self.debug_wrapper==1:
926 spiga 1.199 txt += 'ls -Al\n'
927 fanzago 1.145 txt += '\n'
928 slacapra 1.54
929 fanzago 1.128 for fileWithSuffix in (self.output_file):
930 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
931 slacapra 1.1 txt += '\n'
932 gutsche 1.7 txt += '# check output file\n'
933 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
934 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
935     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
936 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
937 ewv 1.147 else:
938     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
939     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
940 slacapra 1.106 txt += 'else\n'
941 fanzago 1.161 txt += ' job_exit_code=60302\n'
942     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
943 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
944 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
945     txt += ' echo "prepare dummy output file"\n'
946     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
947     txt += ' fi \n'
948 slacapra 1.1 txt += 'fi\n'
949 slacapra 1.105 file_list = []
950     for fileWithSuffix in (self.output_file):
951 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
952 ewv 1.131
953 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
954 fanzago 1.149 txt += '\n'
955 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
956     txt += 'echo ">>> current directory content:"\n'
957 fanzago 1.285 if self.debug_wrapper==1:
958 spiga 1.199 txt += 'ls -Al\n'
959 fanzago 1.148 txt += '\n'
960 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
961 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
962 slacapra 1.1 return txt
963    
964 slacapra 1.63 def getRequirements(self, nj=[]):
965 slacapra 1.1 """
966 ewv 1.131 return job requirements to add to jdl files
967 slacapra 1.1 """
968     req = ''
969 slacapra 1.47 if self.version:
970 slacapra 1.10 req='Member("VO-cms-' + \
971 slacapra 1.47 self.version + \
972 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
973 ewv 1.192 if self.executable_arch:
974 gutsche 1.107 req+=' && Member("VO-cms-' + \
975 slacapra 1.105 self.executable_arch + \
976     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
977 gutsche 1.35
978     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
979 spiga 1.345 if ( common.scheduler.name() in ["glitecoll", "glite","glite_slc5"] ):
980 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
981     if ( self.cfg_params.get('GRID.use_cream',None) ):
982     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
983     else:
984     req += ' && other.GlueCEStateStatus == "Production" '
985 gutsche 1.35
986 slacapra 1.1 return req
987 gutsche 1.3
988     def configFilename(self):
989     """ return the config filename """
990 ewv 1.334 return self.name()+'.py'
991 gutsche 1.3
992     def wsSetupCMSOSGEnvironment_(self):
993     """
994     Returns part of a job script which is prepares
995     the execution environment and which is common for all CMS jobs.
996     """
997 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
998     txt += ' echo ">>> setup CMS OSG environment:"\n'
999 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1000     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1001 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1002 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1003 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1004 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1005     txt += ' else\n'
1006 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1007     txt += ' job_exit_code=10020\n'
1008     txt += ' func_exit\n'
1009 fanzago 1.133 txt += ' fi\n'
1010 gutsche 1.3 txt += '\n'
1011 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1012 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1013 gutsche 1.3
1014     return txt
1015 ewv 1.131
1016 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1017     """
1018     Returns part of a job script which is prepares
1019     the execution environment and which is common for all CMS jobs.
1020     """
1021 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1022     txt += ' echo ">>> setup CMS LCG environment:"\n'
1023 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1024     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1025     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1026     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1027 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1028     txt += ' job_exit_code=10031\n'
1029     txt += ' func_exit\n'
1030 fanzago 1.133 txt += ' else\n'
1031     txt += ' echo "Sourcing environment... "\n'
1032     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1033 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1034     txt += ' job_exit_code=10020\n'
1035     txt += ' func_exit\n'
1036 fanzago 1.133 txt += ' fi\n'
1037     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1038     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1039     txt += ' result=$?\n'
1040     txt += ' if [ $result -ne 0 ]; then\n'
1041 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1042     txt += ' job_exit_code=10032\n'
1043     txt += ' func_exit\n'
1044 fanzago 1.133 txt += ' fi\n'
1045     txt += ' fi\n'
1046     txt += ' \n'
1047 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1048 gutsche 1.3 return txt
1049 gutsche 1.5
1050 spiga 1.238 def wsModifyReport(self, nj):
1051 fanzago 1.93 """
1052 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1053 fanzago 1.93 """
1054 ewv 1.250
1055 fanzago 1.281 txt = ''
1056 fanzago 1.292 if (self.copy_data == 1):
1057 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1058 ewv 1.283
1059 spiga 1.238
1060 fanzago 1.344 txt += 'if [ $StageOutExitStatus -eq 0 ] || [ $StageOutExitStatus -eq 60308 ] ; then\n'
1061 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1062 fanzago 1.175 txt += 'else\n'
1063     txt += ' FOR_LFN=/copy_problems/ \n'
1064     txt += 'fi\n'
1065 ewv 1.182
1066 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1067 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1068 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1069 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1070     txt += 'echo "endpoint = $endpoint"\n'
1071     txt += 'SE_PATH=$endpoint\n'
1072     txt += 'echo "SE_PATH = $endpoint"\n'
1073 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1074     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1075 fanzago 1.344
1076 fanzago 1.281
1077 farinafa 1.346 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $OutUniqueID for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1078 fanzago 1.318 if (self.publish_data == 1):
1079     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1080 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1081     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1082 fanzago 1.281
1083 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1084     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1085 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1086     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1087     txt += ' modifyReport_result=70500\n'
1088     txt += ' job_exit_code=$modifyReport_result\n'
1089     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1090     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1091     txt += 'else\n'
1092     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1093 spiga 1.103 txt += 'fi\n'
1094 fanzago 1.93 return txt
1095 ewv 1.283
1096 ewv 1.192 def wsParseFJR(self):
1097 spiga 1.189 """
1098 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1099 spiga 1.189 """
1100     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1101     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1102     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1103     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1104 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1105 fanzago 1.285 if self.debug_wrapper==1 :
1106 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1107     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1108 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1109     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1110 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1111 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1112 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1113     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1114 spiga 1.189 txt += ' else\n'
1115     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1116     txt += ' fi\n'
1117     txt += ' else\n'
1118     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1119     txt += ' fi\n'
1120     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1121 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1122 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1123 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1124 spiga 1.296 """
1125 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1126 spiga 1.189 # VERIFY PROCESSED DATA
1127 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1128     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1129     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1130     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1131     txt += ' mv tmp.txt input-files.txt\n'
1132     txt += ' echo "cat input-files.txt"\n'
1133     txt += ' echo "----------------------"\n'
1134     txt += ' cat input-files.txt\n'
1135     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1136     txt += ' mv tmp.txt processed-files.txt\n'
1137     txt += ' echo "----------------------"\n'
1138     txt += ' echo "cat processed-files.txt"\n'
1139     txt += ' echo "----------------------"\n'
1140     txt += ' cat processed-files.txt\n'
1141     txt += ' echo "----------------------"\n'
1142 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1143 fanzago 1.273 txt += ' fileverify_status=$?\n'
1144     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1145     txt += ' executable_exit_status=30001\n'
1146     txt += ' echo "ERROR ==> not all input files processed"\n'
1147     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1148     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1149     txt += ' fi\n'
1150 spiga 1.296 """
1151 spiga 1.232 txt += ' fi\n'
1152 spiga 1.189 txt += 'else\n'
1153     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1154     txt += 'fi\n'
1155     txt += '\n'
1156 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1157 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1158     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1159     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1160     txt += ' job_exit_code=$executable_exit_status\n'
1161     txt += ' func_exit\n'
1162     txt += 'fi\n\n'
1163 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1164     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1165     txt += 'job_exit_code=$executable_exit_status\n'
1166    
1167     return txt
1168    
1169 gutsche 1.5 def setParam_(self, param, value):
1170     self._params[param] = value
1171    
1172     def getParams(self):
1173     return self._params
1174 gutsche 1.8
1175 spiga 1.257 def outList(self,list=False):
1176 mcinquil 1.121 """
1177     check the dimension of the output files
1178     """
1179 spiga 1.169 txt = ''
1180     txt += 'echo ">>> list of expected files on output sandbox"\n'
1181 mcinquil 1.121 listOutFiles = []
1182 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1183 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1184 spiga 1.268 if len(self.output_file) <= 0:
1185     msg ="WARNING: no output files name have been defined!!\n"
1186     msg+="\tno output files will be reported back/staged\n"
1187 spiga 1.304 common.logger.info(msg)
1188 fanzago 1.148 if (self.return_data == 1):
1189 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1190 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1191 spiga 1.169 listOutFiles.append(stdout)
1192     listOutFiles.append(stderr)
1193 ewv 1.156 else:
1194 spiga 1.157 for file in (self.output_file_sandbox):
1195 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1196 spiga 1.169 listOutFiles.append(stdout)
1197     listOutFiles.append(stderr)
1198 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1199 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1200 spiga 1.169 txt += 'export filesToCheck\n'
1201 spiga 1.341 taskinfo={}
1202     taskinfo['outfileBasename'] = self.output_file
1203     common._db.updateTask_(taskinfo)
1204 ewv 1.276
1205 spiga 1.257 if list : return self.output_file
1206 ewv 1.170 return txt