ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.343
Committed: Sat Nov 14 14:02:44 2009 UTC (15 years, 5 months ago) by mcinquil
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_7_0
Changes since 1.342: +3 -3 lines
Log Message:
typo for pbs

File Contents

# User Rev Content
1 ewv 1.327
2 mcinquil 1.343 __revision__ = "$Id: cms_cmssw.py,v 1.342 2009/10/17 14:58:09 spiga Exp $"
3     __version__ = "$Revision: 1.342 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 spiga 1.296 self.argsList = 1
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.338 ll = len(self.datasetPath.split("/"))
92     if (ll < 4):
93     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
94     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
95     raise CrabException(msg)
96 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
97     self.dataTier = self.datasetPath.split("/")[2]
98 gutsche 1.5
99 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
100 ewv 1.330 self.ads = False
101     if self.datasetPath:
102     self.ads = len(self.datasetPath.split("/")) > 4
103 ewv 1.295
104 ewv 1.327 # FUTURE: Can remove this check
105     if self.ads and self.CMSSW_major < 3:
106     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
107     common.logger.info(' Only file level, not lumi level, granularity is supported.')
108    
109 spiga 1.288 self.debugWrap=''
110 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
111     if self.debug_wrapper == 1: self.debugWrap='--debug'
112 slacapra 1.291
113 slacapra 1.1 ## now the application
114 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
115 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
116 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
117 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
118 slacapra 1.1
119 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
120 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
121 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
122 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
123 slacapra 1.153 if self.pset.lower() != 'none' :
124     if (not os.path.exists(self.pset)):
125     raise CrabException("User defined PSet file "+self.pset+" does not exist")
126     else:
127     self.pset = None
128 slacapra 1.1
129     # output files
130 slacapra 1.53 ## stuff which must be returned always via sandbox
131     self.output_file_sandbox = []
132    
133     # add fjr report by default via sandbox
134     self.output_file_sandbox.append(self.fjrFileName)
135    
136     # other output files to be returned via sandbox or copied to SE
137 mcinquil 1.216 outfileflag = False
138 slacapra 1.153 self.output_file = []
139     tmp = cfg_params.get('CMSSW.output_file',None)
140     if tmp :
141 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
142 mcinquil 1.216 outfileflag = True #output found
143     #else:
144     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1
146     # script_exe file as additional file in inputSandbox
147 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
148     if self.scriptExe :
149 slacapra 1.176 if not os.path.isfile(self.scriptExe):
150     msg ="ERROR. file "+self.scriptExe+" not found"
151     raise CrabException(msg)
152     self.additional_inbox_files.append(string.strip(self.scriptExe))
153 slacapra 1.70
154 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
155     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
156    
157 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
158 slacapra 1.176 msg ="Error. script_exe not defined"
159     raise CrabException(msg)
160 spiga 1.42
161 ewv 1.226 # use parent files...
162 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
163 spiga 1.204
164 slacapra 1.1 ## additional input files
165 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
166 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
167 slacapra 1.70 for tmp in tmpAddFiles:
168     tmp = string.strip(tmp)
169     dirname = ''
170     if not tmp[0]=="/": dirname = "."
171 corvo 1.85 files = []
172     if string.find(tmp,"*")>-1:
173     files = glob.glob(os.path.join(dirname, tmp))
174     if len(files)==0:
175     raise CrabException("No additional input file found with this pattern: "+tmp)
176     else:
177     files.append(tmp)
178 slacapra 1.70 for file in files:
179     if not os.path.exists(file):
180     raise CrabException("Additional input file not found: "+file)
181 slacapra 1.45 pass
182 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
183 slacapra 1.1 pass
184     pass
185 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
186 slacapra 1.153 pass
187 gutsche 1.3
188 gutsche 1.35
189 ewv 1.160 ## New method of dealing with seeds
190     self.incrementSeeds = []
191     self.preserveSeeds = []
192     if cfg_params.has_key('CMSSW.preserve_seeds'):
193     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
194     for tmp in tmpList:
195     tmp.strip()
196     self.preserveSeeds.append(tmp)
197     if cfg_params.has_key('CMSSW.increment_seeds'):
198     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
199     for tmp in tmpList:
200     tmp.strip()
201     self.incrementSeeds.append(tmp)
202    
203 fanzago 1.318 # Copy/return/publish
204 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
205     self.return_data = int(cfg_params.get('USER.return_data',0))
206 fanzago 1.318 ### FEDE ###
207     self.publish_data = int(cfg_params.get('USER.publish_data',0))
208     if (self.publish_data == 1):
209     if not cfg_params.has_key('USER.publish_data_name'):
210     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
211     else:
212     self.processedDataset = cfg_params['USER.publish_data_name']
213 ewv 1.329 """
214 fanzago 1.328 #### check of length of datasetname to publish ####
215 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
216 fanzago 1.328 print "test 100 char limit on datasetname"
217     ###
218     len_file = 0
219     print "self.output_file = ", self.output_file
220     for file in self.output_file:
221     length = len(file)
222     if length > len_file:
223     len_file = length
224 ewv 1.329 print "len_file = ", len_file
225 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
226 ewv 1.329 ###
227 fanzago 1.318 user = getUserName()
228 fanzago 1.328 len_user_name = len(user)
229 fanzago 1.318 common.logger.debug("user = " + user)
230 fanzago 1.328 print "len_user_name = ", len_user_name
231 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
232 fanzago 1.328
233 fanzago 1.318 len_processedDataset = len(self.processedDataset)
234     common.logger.debug("processedDataset " + self.processedDataset)
235     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
236 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
237 ewv 1.329
238 fanzago 1.318 if (self.datasetPath != None ):
239     len_primary = len(self.primaryDataset)
240     common.logger.debug("primaryDataset = " + self.primaryDataset)
241     common.logger.debug("len_primary = " + str(len_primary))
242 fanzago 1.328 if (len_primary > 100):
243     raise CrabException("Warning: primary datasetname has to be < 100 characters")
244     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
245     #if (len_processedDataset > (59 - len_user_name - len_primary)):
246     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
247     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
248     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
249 fanzago 1.318 else:
250 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
251     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
252     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
253     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
254 ewv 1.329 """
255 ewv 1.276
256     self.conf = {}
257     self.conf['pubdata'] = None
258 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
259 slacapra 1.1 #DBSDLS-start
260 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
261 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
262     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
263 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
264 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
265 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
266 gutsche 1.35 blockSites = {}
267 spiga 1.342 #wmbs
268     self.automation = int(self.cfg_params.get('WMBS.automation',0))
269     if self.automation == 0:
270     if self.datasetPath:
271     blockSites = self.DataDiscoveryAndLocation(cfg_params)
272     #DBSDLS-end
273     self.conf['blockSites']=blockSites
274    
275     ## Select Splitting
276     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
277    
278     if self.selectNoInput:
279     if self.pset == None:
280     self.algo = 'ForScript'
281     else:
282     self.algo = 'NoInput'
283     self.conf['managedGenerators']=self.managedGenerators
284     self.conf['generator']=self.generator
285     elif self.ads:
286     self.algo = 'LumiBased'
287     elif splitByRun ==1:
288     self.algo = 'RunBased'
289 spiga 1.42 else:
290 spiga 1.342 self.algo = 'EventBased'
291     common.logger.debug("Job splitting method: %s" % self.algo)
292    
293     splitter = JobSplitter(self.cfg_params,self.conf)
294     self.dict = splitter.Algos()[self.algo]()
295 gutsche 1.5
296 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
297     self.rootArgsFilename= 'arguments'
298 spiga 1.208 # modify Pset only the first time
299 spiga 1.320 if isNew:
300     if self.pset != None: self.ModifyPset()
301 spiga 1.300
302 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
303     self.tarNameWithPath = self.getTarBall(self.executable)
304 spiga 1.293
305    
306     def ModifyPset(self):
307     import PsetManipulator as pp
308 ewv 1.335
309     # If pycfg_params set, fake out the config script
310     # to make it think it was called with those args
311     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
312     if pycfg_params:
313     trueArgv = sys.argv
314     sys.argv = [self.pset]
315     sys.argv.extend(pycfg_params.split(' '))
316 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
317 ewv 1.335 if pycfg_params: # Restore original sys.argv
318     sys.argv = trueArgv
319    
320 spiga 1.293 try:
321     # Add FrameworkJobReport to parameter-set, set max events.
322     # Reset later for data jobs by writeCFG which does all modifications
323 ewv 1.295 PsetEdit.maxEvent(1)
324 spiga 1.293 PsetEdit.skipEvent(0)
325     PsetEdit.psetWriter(self.configFilename())
326     ## If present, add TFileService to output files
327     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
328     tfsOutput = PsetEdit.getTFileService()
329     if tfsOutput:
330     if tfsOutput in self.output_file:
331 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
332 spiga 1.293 else:
333     outfileflag = True #output found
334     self.output_file.append(tfsOutput)
335 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
336 spiga 1.293 pass
337     pass
338 ewv 1.321 # If present and requested, add PoolOutputModule to output files
339 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
340 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
341     if edmOutput:
342 ewv 1.321 for outputFile in edmOutput:
343     if outputFile in self.output_file:
344 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
345 ewv 1.321 else:
346     self.output_file.append(outputFile)
347     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
348     # not requested, check anyhow to avoid accidental T2 overload
349 slacapra 1.297 else:
350 ewv 1.321 if edmOutput:
351     missedFiles = []
352     for outputFile in edmOutput:
353     if outputFile not in self.output_file:
354     missedFiles.append(outputFile)
355     if missedFiles:
356     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
357     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
358     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
359     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
360     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
361     common.logger.info(msg)
362 spiga 1.322 else :
363 ewv 1.321 raise CrabException(msg)
364 ewv 1.301
365     if (PsetEdit.getBadFilesSetting()):
366     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
367 spiga 1.304 common.logger.info(msg)
368 ewv 1.301
369 slacapra 1.297 except CrabException, msg:
370 spiga 1.304 common.logger.info(str(msg))
371 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
372 spiga 1.293 raise CrabException(msg)
373    
374 gutsche 1.3
375 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
376    
377 slacapra 1.86 import DataDiscovery
378     import DataLocation
379 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
380 gutsche 1.3
381     datasetPath=self.datasetPath
382    
383 slacapra 1.1 ## Contact the DBS
384 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
385 slacapra 1.1 try:
386 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
387 slacapra 1.1 self.pubdata.fetchDBSInfo()
388    
389 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
390 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
391     raise CrabException(msg)
392 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
393 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
394     raise CrabException(msg)
395 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
396 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
397 slacapra 1.1 raise CrabException(msg)
398    
399 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
400 slacapra 1.270 #print self.filesbyblock
401 spiga 1.269 self.conf['pubdata']=self.pubdata
402 gutsche 1.3
403 slacapra 1.1 ## get max number of events
404 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
405 slacapra 1.1
406     ## Contact the DLS and build a list of sites hosting the fileblocks
407     try:
408 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
409 gutsche 1.6 dataloc.fetchDLSInfo()
410 slacapra 1.263
411 slacapra 1.41 except DataLocation.DataLocationError , ex:
412 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
413     raise CrabException(msg)
414 ewv 1.131
415 slacapra 1.1
416 slacapra 1.270 unsorted_sites = dataloc.getSites()
417     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
418     for lfn in self.filesbyblock.keys():
419     if unsorted_sites.has_key(lfn):
420     sites[lfn]=unsorted_sites[lfn]
421     else:
422     sites[lfn]=[]
423    
424 slacapra 1.264 if len(sites)==0:
425 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
426     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
427     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
428 slacapra 1.264 raise CrabException(msg)
429    
430 gutsche 1.35 allSites = []
431     listSites = sites.values()
432 slacapra 1.63 for listSite in listSites:
433     for oneSite in listSite:
434 gutsche 1.35 allSites.append(oneSite)
435 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
436 ewv 1.295
437 gutsche 1.3
438 gutsche 1.92 # screen output
439 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
440 gutsche 1.92
441 gutsche 1.35 return sites
442 ewv 1.131
443 spiga 1.42
444 spiga 1.208 def split(self, jobParams,firstJobID):
445 ewv 1.276
446 spiga 1.293 jobParams = self.dict['args']
447 spiga 1.269 njobs = self.dict['njobs']
448     self.jobDestination = self.dict['jobDestination']
449 ewv 1.131
450 ewv 1.333 if njobs == 0:
451     raise CrabException("Asked to split zero jobs: aborting")
452     if not self.server and not self.local and njobs > 500:
453     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
454 slacapra 1.263
455 gutsche 1.3 # create the empty structure
456     for i in range(njobs):
457     jobParams.append("")
458 ewv 1.131
459 spiga 1.165 listID=[]
460     listField=[]
461 spiga 1.293 listDictions=[]
462 spiga 1.300 exist= os.path.exists(self.argsFile)
463 spiga 1.208 for id in range(njobs):
464     job = id + int(firstJobID)
465 spiga 1.167 listID.append(job+1)
466 spiga 1.162 job_ToSave ={}
467 spiga 1.169 concString = ' '
468 spiga 1.165 argu=''
469 spiga 1.293 str_argu = str(job+1)
470 spiga 1.208 if len(jobParams[id]):
471 ewv 1.295 argu = {'JobID': job+1}
472 spiga 1.293 for i in range(len(jobParams[id])):
473     argu[self.dict['params'][i]]=jobParams[id][i]
474 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
475 ewv 1.295 # just for debug
476 spiga 1.293 str_argu += concString.join(jobParams[id])
477 spiga 1.314 if argu != '': listDictions.append(argu)
478 spiga 1.298 job_ToSave['arguments']= str(job+1)
479 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
480 spiga 1.165 listField.append(job_ToSave)
481 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
482     cms_se = CmsSEMap()
483 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
484 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
485 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
486     msg+="\t CMSDestination: %s "%(str(SEDestination))
487 spiga 1.307 common.logger.log(10-1,msg)
488 spiga 1.293 # write xml
489 ewv 1.295 if len(listDictions):
490 spiga 1.293 if exist==False: self.CreateXML()
491     self.addEntry(listDictions)
492 spiga 1.320 # self.zipXMLfile()
493 spiga 1.187 common._db.updateJob_(listID,listField)
494 spiga 1.293 return
495 ewv 1.313
496 spiga 1.320 # def zipXMLfile(self):
497 ewv 1.313
498 spiga 1.320 # import tarfile
499     # try:
500     # tar = tarfile.open(self.tarNameWithPath, "a")
501     # tar.add(self.argsFile, os.path.basename(self.argsFile))
502     # tar.close()
503     # except IOError, exc:
504     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
505     # msg += str(exc)
506     # raise CrabException(msg)
507     # except tarfile.TarError, exc:
508     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
509     # msg += str(exc)
510     # raise CrabException(msg)
511 ewv 1.325
512 spiga 1.293 def CreateXML(self):
513     """
514 ewv 1.295 """
515 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
516     outfile = file( self.argsFile, 'w').write(str(result))
517 ewv 1.295 return
518 spiga 1.293
519     def addEntry(self, listDictions):
520     """
521     _addEntry_
522 ewv 1.295
523 spiga 1.293 add an entry to the xml file
524     """
525     from IMProv.IMProvLoader import loadIMProvFile
526     ## load xml
527 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
528 spiga 1.293 entrname= 'Job'
529     for dictions in listDictions:
530     report = IMProvNode(entrname , None, **dictions)
531     improvDoc.addNode(report)
532 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
533 gutsche 1.3 return
534 ewv 1.131
535 gutsche 1.3 def numberOfJobs(self):
536 spiga 1.342 #wmbs
537     if self.automation==0:
538     return self.dict['njobs']
539     else:
540     return None
541    
542 slacapra 1.1 def getTarBall(self, exe):
543     """
544     Return the TarBall with lib and exe
545     """
546 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
547     if os.path.exists(self.tgzNameWithPath):
548     return self.tgzNameWithPath
549 slacapra 1.1
550     # Prepare a tar gzipped file with user binaries.
551     self.buildTar_(exe)
552    
553 spiga 1.320 return string.strip(self.tgzNameWithPath)
554 slacapra 1.1
555     def buildTar_(self, executable):
556    
557     # First of all declare the user Scram area
558     swArea = self.scram.getSWArea_()
559     swReleaseTop = self.scram.getReleaseTop_()
560 ewv 1.131
561 slacapra 1.1 ## check if working area is release top
562     if swReleaseTop == '' or swArea == swReleaseTop:
563 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
564 slacapra 1.1 return
565    
566 slacapra 1.61 import tarfile
567     try: # create tar ball
568 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
569 slacapra 1.61 ## First find the executable
570 slacapra 1.86 if (self.executable != ''):
571 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
572     if ( not exeWithPath ):
573     raise CrabException('User executable '+executable+' not found')
574 ewv 1.131
575 slacapra 1.61 ## then check if it's private or not
576     if exeWithPath.find(swReleaseTop) == -1:
577     # the exe is private, so we must ship
578 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
579 slacapra 1.61 path = swArea+'/'
580 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
581     if exeWithPath.find(path) >= 0 :
582     exe = string.replace(exeWithPath, path,'')
583 slacapra 1.129 tar.add(path+exe,exe)
584 corvo 1.85 else :
585     tar.add(exeWithPath,os.path.basename(executable))
586 slacapra 1.61 pass
587     else:
588     # the exe is from release, we'll find it on WN
589     pass
590 ewv 1.131
591 slacapra 1.61 ## Now get the libraries: only those in local working area
592 slacapra 1.256 tar.dereference=True
593 slacapra 1.61 libDir = 'lib'
594     lib = swArea+'/' +libDir
595 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
596 slacapra 1.61 if os.path.exists(lib):
597     tar.add(lib,libDir)
598 ewv 1.131
599 slacapra 1.61 ## Now check if module dir is present
600     moduleDir = 'module'
601     module = swArea + '/' + moduleDir
602     if os.path.isdir(module):
603     tar.add(module,moduleDir)
604 slacapra 1.256 tar.dereference=False
605 slacapra 1.61
606     ## Now check if any data dir(s) is present
607 spiga 1.179 self.dataExist = False
608 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
609 slacapra 1.206 while len(todo_list):
610     entry, name = todo_list.pop()
611 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
612 slacapra 1.206 continue
613 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
614 slacapra 1.206 entryPath = entry + '/'
615 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
616 slacapra 1.206 if name == 'data':
617     self.dataExist=True
618 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
619 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
620 slacapra 1.206 pass
621     pass
622 ewv 1.182
623 spiga 1.179 ### CMSSW ParameterSet
624     if not self.pset is None:
625     cfg_file = common.work_space.jobDir()+self.configFilename()
626 ewv 1.182 tar.add(cfg_file,self.configFilename())
627 ewv 1.313
628 spiga 1.309 try:
629     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
630     tar.add(crab_cfg_file,'crab.cfg')
631     except:
632     pass
633 fanzago 1.93
634 fanzago 1.152 ## Add ProdCommon dir to tar
635 slacapra 1.211 prodcommonDir = './'
636     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
637 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
638 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
639     'WMCore/__init__.py','WMCore/Algorithms']
640 slacapra 1.214 for file in neededStuff:
641     tar.add(prodcommonPath+file,prodcommonDir+file)
642 spiga 1.179
643     ##### ML stuff
644     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
645     path=os.environ['CRABDIR'] + '/python/'
646     for file in ML_file_list:
647     tar.add(path+file,file)
648    
649     ##### Utils
650 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
651 spiga 1.179 for file in Utils_file_list:
652     tar.add(path+file,file)
653 ewv 1.131
654 ewv 1.182 ##### AdditionalFiles
655 slacapra 1.253 tar.dereference=True
656 spiga 1.179 for file in self.additional_inbox_files:
657     tar.add(file,string.split(file,'/')[-1])
658 slacapra 1.253 tar.dereference=False
659 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
660 ewv 1.182
661 slacapra 1.61 tar.close()
662 mcinquil 1.241 except IOError, exc:
663 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
664 spiga 1.304 msg += str(exc)
665     raise CrabException(msg)
666 mcinquil 1.241 except tarfile.TarError, exc:
667 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
668 spiga 1.304 msg += str(exc)
669     raise CrabException(msg)
670 spiga 1.300
671 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
672     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
673 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
674 ewv 1.250 +'MB input sandbox limit \n'
675 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
676     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
677 mcinquil 1.336 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServerForUsers#Server_available_for_users'
678 spiga 1.238 raise CrabException(msg)
679 gutsche 1.72
680 slacapra 1.61 ## create tar-ball with ML stuff
681 slacapra 1.97
682 spiga 1.165 def wsSetupEnvironment(self, nj=0):
683 slacapra 1.1 """
684     Returns part of a job script which prepares
685     the execution environment for the job 'nj'.
686     """
687 ewv 1.334 psetName = 'pset.py'
688    
689 slacapra 1.1 # Prepare JobType-independent part
690 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
691 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
692 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
693 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
694 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
695 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
696 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
697 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
698     txt += ' job_exit_code=10016\n'
699     txt += ' func_exit\n'
700 gutsche 1.3 txt += ' fi\n'
701 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
702 gutsche 1.3 txt += '\n'
703     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
704     txt += ' cd $WORKING_DIR\n'
705 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
706 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
707 spiga 1.282 #Setup SGE Environment
708 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
709 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
710    
711 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
712     txt += self.wsSetupCMSLCGEnvironment_()
713    
714 mcinquil 1.340 #Setup PBS Environment
715 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
716 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
717    
718 gutsche 1.3 txt += 'fi\n'
719 slacapra 1.1
720     # Prepare JobType-specific part
721     scram = self.scram.commandName()
722     txt += '\n\n'
723 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
724     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
725 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
726     txt += 'status=$?\n'
727     txt += 'if [ $status != 0 ] ; then\n'
728 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
729     txt += ' job_exit_code=10034\n'
730 fanzago 1.163 txt += ' func_exit\n'
731 slacapra 1.1 txt += 'fi \n'
732     txt += 'cd '+self.version+'\n'
733 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
734 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
735 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
736 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
737     txt += ' echo "ERROR ==> Problem with the command: "\n'
738     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
739     txt += ' job_exit_code=10034\n'
740     txt += ' func_exit\n'
741     txt += 'fi \n'
742 slacapra 1.1 # Handle the arguments:
743     txt += "\n"
744 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
745 slacapra 1.1 txt += "\n"
746 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
747 slacapra 1.1 txt += "then\n"
748 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
749     txt += ' job_exit_code=50113\n'
750     txt += " func_exit\n"
751 slacapra 1.1 txt += "fi\n"
752     txt += "\n"
753    
754     # Prepare job-specific part
755     job = common.job_list[nj]
756 ewv 1.131 if (self.datasetPath):
757 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
758     #DataTier = self.datasetPath.split("/")[2]
759 fanzago 1.93 txt += '\n'
760     txt += 'DatasetPath='+self.datasetPath+'\n'
761    
762 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
763 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
764 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
765 fanzago 1.93
766     else:
767 fanzago 1.318 #self.primaryDataset = 'null'
768 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
769     txt += 'PrimaryDataset=null\n'
770     txt += 'DataTier=null\n'
771     txt += 'ApplicationFamily=MCDataTier\n'
772 ewv 1.170 if self.pset != None:
773 spiga 1.42 pset = os.path.basename(job.configFilename())
774     txt += '\n'
775 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
776 spiga 1.296
777 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
778     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
779     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
780     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
781 slacapra 1.90
782 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
783 ewv 1.319 else:
784 spiga 1.314 txt += '\n'
785 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
786 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
787 gutsche 1.3 return txt
788 slacapra 1.176
789 fanzago 1.166 def wsUntarSoftware(self, nj=0):
790 gutsche 1.3 """
791     Put in the script the commands to build an executable
792     or a library.
793     """
794    
795 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
796 gutsche 1.3
797     if os.path.isfile(self.tgzNameWithPath):
798 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
799 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
800 fanzago 1.285 if self.debug_wrapper==1 :
801 spiga 1.199 txt += 'ls -Al \n'
802 gutsche 1.3 txt += 'untar_status=$? \n'
803     txt += 'if [ $untar_status -ne 0 ]; then \n'
804 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
805     txt += ' job_exit_code=$untar_status\n'
806     txt += ' func_exit\n'
807 gutsche 1.3 txt += 'else \n'
808     txt += ' echo "Successful untar" \n'
809     txt += 'fi \n'
810 gutsche 1.50 txt += '\n'
811 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
812 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
813 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
814 gutsche 1.50 txt += 'else\n'
815 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
816 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
817 gutsche 1.50 txt += 'fi\n'
818     txt += '\n'
819    
820 gutsche 1.3 pass
821 ewv 1.131
822 slacapra 1.1 return txt
823 ewv 1.170
824 fanzago 1.166 def wsBuildExe(self, nj=0):
825     """
826     Put in the script the commands to build an executable
827     or a library.
828     """
829    
830     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
831     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
832    
833 ewv 1.170 txt += 'rm -r lib/ module/ \n'
834     txt += 'mv $RUNTIME_AREA/lib/ . \n'
835     txt += 'mv $RUNTIME_AREA/module/ . \n'
836 spiga 1.186 if self.dataExist == True:
837     txt += 'rm -r src/ \n'
838     txt += 'mv $RUNTIME_AREA/src/ . \n'
839 ewv 1.182 if len(self.additional_inbox_files)>0:
840 spiga 1.179 for file in self.additional_inbox_files:
841 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
842 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
843     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
844 ewv 1.170
845 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
846 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
847 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
848 fanzago 1.166 txt += 'else\n'
849 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
850 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
851     txt += 'fi\n'
852     txt += '\n'
853    
854 slacapra 1.302 if self.pset != None:
855 ewv 1.334 psetName = 'pset.py'
856    
857 slacapra 1.302 txt += '\n'
858     if self.debug_wrapper == 1:
859     txt += 'echo "***** cat ' + psetName + ' *********"\n'
860     txt += 'cat ' + psetName + '\n'
861     txt += 'echo "****** end ' + psetName + ' ********"\n'
862     txt += '\n'
863     txt += 'echo "***********************" \n'
864     txt += 'which edmConfigHash \n'
865     txt += 'echo "***********************" \n'
866 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
867     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
868 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
869     #### FEDE temporary fix for noEdm files #####
870     txt += 'if [ -z "$PSETHASH" ]; then \n'
871     txt += ' export PSETHASH=null\n'
872     txt += 'fi \n'
873     #############################################
874     txt += '\n'
875 fanzago 1.166 return txt
876 slacapra 1.1
877 ewv 1.131
878 slacapra 1.1 def executableName(self):
879 ewv 1.192 if self.scriptExe:
880 spiga 1.42 return "sh "
881     else:
882     return self.executable
883 slacapra 1.1
884     def executableArgs(self):
885 ewv 1.276 if self.scriptExe:
886 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
887 fanzago 1.115 else:
888 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
889 slacapra 1.1
890     def inputSandbox(self, nj):
891     """
892     Returns a list of filenames to be put in JDL input sandbox.
893     """
894     inp_box = []
895     if os.path.isfile(self.tgzNameWithPath):
896     inp_box.append(self.tgzNameWithPath)
897 spiga 1.320 if os.path.isfile(self.argsFile):
898     inp_box.append(self.argsFile)
899 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
900 slacapra 1.1 return inp_box
901    
902     def outputSandbox(self, nj):
903     """
904     Returns a list of filenames to be put in JDL output sandbox.
905     """
906     out_box = []
907    
908     ## User Declared output files
909 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
910 ewv 1.131 n_out = nj + 1
911 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
912 slacapra 1.1 return out_box
913    
914    
915     def wsRenameOutput(self, nj):
916     """
917     Returns part of a job script which renames the produced files.
918     """
919    
920 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
921 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
922     txt += 'echo ">>> current directory content:"\n'
923 fanzago 1.285 if self.debug_wrapper==1:
924 spiga 1.199 txt += 'ls -Al\n'
925 fanzago 1.145 txt += '\n'
926 slacapra 1.54
927 fanzago 1.128 for fileWithSuffix in (self.output_file):
928 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
929 slacapra 1.1 txt += '\n'
930 gutsche 1.7 txt += '# check output file\n'
931 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
932 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
933     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
934 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
935 ewv 1.147 else:
936     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
937     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
938 slacapra 1.106 txt += 'else\n'
939 fanzago 1.161 txt += ' job_exit_code=60302\n'
940     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
941 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
942 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
943     txt += ' echo "prepare dummy output file"\n'
944     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
945     txt += ' fi \n'
946 slacapra 1.1 txt += 'fi\n'
947 slacapra 1.105 file_list = []
948     for fileWithSuffix in (self.output_file):
949 spiga 1.246 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
950 ewv 1.131
951 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
952 fanzago 1.149 txt += '\n'
953 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
954     txt += 'echo ">>> current directory content:"\n'
955 fanzago 1.285 if self.debug_wrapper==1:
956 spiga 1.199 txt += 'ls -Al\n'
957 fanzago 1.148 txt += '\n'
958 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
959 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
960 slacapra 1.1 return txt
961    
962 slacapra 1.63 def getRequirements(self, nj=[]):
963 slacapra 1.1 """
964 ewv 1.131 return job requirements to add to jdl files
965 slacapra 1.1 """
966     req = ''
967 slacapra 1.47 if self.version:
968 slacapra 1.10 req='Member("VO-cms-' + \
969 slacapra 1.47 self.version + \
970 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
971 ewv 1.192 if self.executable_arch:
972 gutsche 1.107 req+=' && Member("VO-cms-' + \
973 slacapra 1.105 self.executable_arch + \
974     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
975 gutsche 1.35
976     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
977 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
978 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
979     if ( self.cfg_params.get('GRID.use_cream',None) ):
980     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
981     else:
982     req += ' && other.GlueCEStateStatus == "Production" '
983 gutsche 1.35
984 slacapra 1.1 return req
985 gutsche 1.3
986     def configFilename(self):
987     """ return the config filename """
988 ewv 1.334 return self.name()+'.py'
989 gutsche 1.3
990     def wsSetupCMSOSGEnvironment_(self):
991     """
992     Returns part of a job script which is prepares
993     the execution environment and which is common for all CMS jobs.
994     """
995 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
996     txt += ' echo ">>> setup CMS OSG environment:"\n'
997 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
998     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
999 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1000 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1001 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1002 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1003     txt += ' else\n'
1004 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1005     txt += ' job_exit_code=10020\n'
1006     txt += ' func_exit\n'
1007 fanzago 1.133 txt += ' fi\n'
1008 gutsche 1.3 txt += '\n'
1009 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1010 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1011 gutsche 1.3
1012     return txt
1013 ewv 1.131
1014 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1015     """
1016     Returns part of a job script which is prepares
1017     the execution environment and which is common for all CMS jobs.
1018     """
1019 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1020     txt += ' echo ">>> setup CMS LCG environment:"\n'
1021 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1022     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1023     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1024     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1025 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1026     txt += ' job_exit_code=10031\n'
1027     txt += ' func_exit\n'
1028 fanzago 1.133 txt += ' else\n'
1029     txt += ' echo "Sourcing environment... "\n'
1030     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1031 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1032     txt += ' job_exit_code=10020\n'
1033     txt += ' func_exit\n'
1034 fanzago 1.133 txt += ' fi\n'
1035     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1036     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1037     txt += ' result=$?\n'
1038     txt += ' if [ $result -ne 0 ]; then\n'
1039 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1040     txt += ' job_exit_code=10032\n'
1041     txt += ' func_exit\n'
1042 fanzago 1.133 txt += ' fi\n'
1043     txt += ' fi\n'
1044     txt += ' \n'
1045 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1046 gutsche 1.3 return txt
1047 gutsche 1.5
1048 spiga 1.238 def wsModifyReport(self, nj):
1049 fanzago 1.93 """
1050 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1051 fanzago 1.93 """
1052 ewv 1.250
1053 fanzago 1.281 txt = ''
1054 fanzago 1.292 if (self.copy_data == 1):
1055 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1056 ewv 1.283
1057 spiga 1.238
1058     txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1059 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1060 fanzago 1.175 txt += 'else\n'
1061     txt += ' FOR_LFN=/copy_problems/ \n'
1062     txt += 'fi\n'
1063 ewv 1.182
1064 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1065 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1066 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1067 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1068     txt += 'echo "endpoint = $endpoint"\n'
1069     txt += 'SE_PATH=$endpoint\n'
1070     txt += 'echo "SE_PATH = $endpoint"\n'
1071 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1072     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1073 fanzago 1.337
1074 fanzago 1.281
1075 fanzago 1.323 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1076 fanzago 1.318 if (self.publish_data == 1):
1077     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1078 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1079     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1080 fanzago 1.281
1081 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1082     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1083 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1084     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1085     txt += ' modifyReport_result=70500\n'
1086     txt += ' job_exit_code=$modifyReport_result\n'
1087     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1088     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1089     txt += 'else\n'
1090     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1091 spiga 1.103 txt += 'fi\n'
1092 fanzago 1.93 return txt
1093 ewv 1.283
1094 ewv 1.192 def wsParseFJR(self):
1095 spiga 1.189 """
1096 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1097 spiga 1.189 """
1098     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1099     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1100     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1101     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1102 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1103 fanzago 1.285 if self.debug_wrapper==1 :
1104 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1105     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1106 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1107     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1108 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1109 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1110 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1111     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1112 spiga 1.189 txt += ' else\n'
1113     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1114     txt += ' fi\n'
1115     txt += ' else\n'
1116     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1117     txt += ' fi\n'
1118     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1119 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1120 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1121 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1122 spiga 1.296 """
1123 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1124 spiga 1.189 # VERIFY PROCESSED DATA
1125 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1126     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1127     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1128     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1129     txt += ' mv tmp.txt input-files.txt\n'
1130     txt += ' echo "cat input-files.txt"\n'
1131     txt += ' echo "----------------------"\n'
1132     txt += ' cat input-files.txt\n'
1133     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1134     txt += ' mv tmp.txt processed-files.txt\n'
1135     txt += ' echo "----------------------"\n'
1136     txt += ' echo "cat processed-files.txt"\n'
1137     txt += ' echo "----------------------"\n'
1138     txt += ' cat processed-files.txt\n'
1139     txt += ' echo "----------------------"\n'
1140 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1141 fanzago 1.273 txt += ' fileverify_status=$?\n'
1142     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1143     txt += ' executable_exit_status=30001\n'
1144     txt += ' echo "ERROR ==> not all input files processed"\n'
1145     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1146     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1147     txt += ' fi\n'
1148 spiga 1.296 """
1149 spiga 1.232 txt += ' fi\n'
1150 spiga 1.189 txt += 'else\n'
1151     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1152     txt += 'fi\n'
1153     txt += '\n'
1154 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1155 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1156     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1157     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1158     txt += ' job_exit_code=$executable_exit_status\n'
1159     txt += ' func_exit\n'
1160     txt += 'fi\n\n'
1161 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1162     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1163     txt += 'job_exit_code=$executable_exit_status\n'
1164    
1165     return txt
1166    
1167 gutsche 1.5 def setParam_(self, param, value):
1168     self._params[param] = value
1169    
1170     def getParams(self):
1171     return self._params
1172 gutsche 1.8
1173 spiga 1.257 def outList(self,list=False):
1174 mcinquil 1.121 """
1175     check the dimension of the output files
1176     """
1177 spiga 1.169 txt = ''
1178     txt += 'echo ">>> list of expected files on output sandbox"\n'
1179 mcinquil 1.121 listOutFiles = []
1180 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1181 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1182 spiga 1.268 if len(self.output_file) <= 0:
1183     msg ="WARNING: no output files name have been defined!!\n"
1184     msg+="\tno output files will be reported back/staged\n"
1185 spiga 1.304 common.logger.info(msg)
1186 fanzago 1.148 if (self.return_data == 1):
1187 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1188 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1189 spiga 1.169 listOutFiles.append(stdout)
1190     listOutFiles.append(stderr)
1191 ewv 1.156 else:
1192 spiga 1.157 for file in (self.output_file_sandbox):
1193 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1194 spiga 1.169 listOutFiles.append(stdout)
1195     listOutFiles.append(stderr)
1196 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1197 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1198 spiga 1.169 txt += 'export filesToCheck\n'
1199 spiga 1.341 taskinfo={}
1200     taskinfo['outfileBasename'] = self.output_file
1201     common._db.updateTask_(taskinfo)
1202 ewv 1.276
1203 spiga 1.257 if list : return self.output_file
1204 ewv 1.170 return txt