ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.335
Committed: Thu Aug 27 12:58:30 2009 UTC (15 years, 8 months ago) by ewv
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_7_0_pre1
Changes since 1.334: +13 -2 lines
Log Message:
Code and documentation for passing parameters to pset.py

File Contents

# User Rev Content
1 ewv 1.327
2 ewv 1.335 __revision__ = "$Id: cms_cmssw.py,v 1.334 2009/08/27 12:42:23 ewv Exp $"
3     __version__ = "$Revision: 1.334 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 spiga 1.296 self.argsList = 1
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 ewv 1.333 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
92     self.dataTier = self.datasetPath.split("/")[2]
93 gutsche 1.5
94 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
95 ewv 1.330 self.ads = False
96     if self.datasetPath:
97     self.ads = len(self.datasetPath.split("/")) > 4
98 ewv 1.295
99 ewv 1.327 # FUTURE: Can remove this check
100     if self.ads and self.CMSSW_major < 3:
101     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
102     common.logger.info(' Only file level, not lumi level, granularity is supported.')
103    
104 spiga 1.288 self.debugWrap=''
105 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
106     if self.debug_wrapper == 1: self.debugWrap='--debug'
107 slacapra 1.291
108 slacapra 1.1 ## now the application
109 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
110 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
111 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
112 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
113 slacapra 1.1
114 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
115 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
116 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
117 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
118 slacapra 1.153 if self.pset.lower() != 'none' :
119     if (not os.path.exists(self.pset)):
120     raise CrabException("User defined PSet file "+self.pset+" does not exist")
121     else:
122     self.pset = None
123 slacapra 1.1
124     # output files
125 slacapra 1.53 ## stuff which must be returned always via sandbox
126     self.output_file_sandbox = []
127    
128     # add fjr report by default via sandbox
129     self.output_file_sandbox.append(self.fjrFileName)
130    
131     # other output files to be returned via sandbox or copied to SE
132 mcinquil 1.216 outfileflag = False
133 slacapra 1.153 self.output_file = []
134     tmp = cfg_params.get('CMSSW.output_file',None)
135     if tmp :
136 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
137 mcinquil 1.216 outfileflag = True #output found
138     #else:
139     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
140 slacapra 1.1
141     # script_exe file as additional file in inputSandbox
142 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
143     if self.scriptExe :
144 slacapra 1.176 if not os.path.isfile(self.scriptExe):
145     msg ="ERROR. file "+self.scriptExe+" not found"
146     raise CrabException(msg)
147     self.additional_inbox_files.append(string.strip(self.scriptExe))
148 slacapra 1.70
149 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
150     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
151    
152 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
153 slacapra 1.176 msg ="Error. script_exe not defined"
154     raise CrabException(msg)
155 spiga 1.42
156 ewv 1.226 # use parent files...
157 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
158 spiga 1.204
159 slacapra 1.1 ## additional input files
160 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
161 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
162 slacapra 1.70 for tmp in tmpAddFiles:
163     tmp = string.strip(tmp)
164     dirname = ''
165     if not tmp[0]=="/": dirname = "."
166 corvo 1.85 files = []
167     if string.find(tmp,"*")>-1:
168     files = glob.glob(os.path.join(dirname, tmp))
169     if len(files)==0:
170     raise CrabException("No additional input file found with this pattern: "+tmp)
171     else:
172     files.append(tmp)
173 slacapra 1.70 for file in files:
174     if not os.path.exists(file):
175     raise CrabException("Additional input file not found: "+file)
176 slacapra 1.45 pass
177 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
178 slacapra 1.1 pass
179     pass
180 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
181 slacapra 1.153 pass
182 gutsche 1.3
183 gutsche 1.35
184 ewv 1.160 ## New method of dealing with seeds
185     self.incrementSeeds = []
186     self.preserveSeeds = []
187     if cfg_params.has_key('CMSSW.preserve_seeds'):
188     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
189     for tmp in tmpList:
190     tmp.strip()
191     self.preserveSeeds.append(tmp)
192     if cfg_params.has_key('CMSSW.increment_seeds'):
193     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
194     for tmp in tmpList:
195     tmp.strip()
196     self.incrementSeeds.append(tmp)
197    
198 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
199 slacapra 1.90
200 fanzago 1.318 # Copy/return/publish
201 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
202     self.return_data = int(cfg_params.get('USER.return_data',0))
203 fanzago 1.318 ### FEDE ###
204     self.publish_data = int(cfg_params.get('USER.publish_data',0))
205     if (self.publish_data == 1):
206     if not cfg_params.has_key('USER.publish_data_name'):
207     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
208     else:
209     self.processedDataset = cfg_params['USER.publish_data_name']
210 ewv 1.329 """
211 fanzago 1.328 #### check of length of datasetname to publish ####
212 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
213 fanzago 1.328 print "test 100 char limit on datasetname"
214     ###
215     len_file = 0
216     print "self.output_file = ", self.output_file
217     for file in self.output_file:
218     length = len(file)
219     if length > len_file:
220     len_file = length
221 ewv 1.329 print "len_file = ", len_file
222 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
223 ewv 1.329 ###
224 fanzago 1.318 user = getUserName()
225 fanzago 1.328 len_user_name = len(user)
226 fanzago 1.318 common.logger.debug("user = " + user)
227 fanzago 1.328 print "len_user_name = ", len_user_name
228 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
229 fanzago 1.328
230 fanzago 1.318 len_processedDataset = len(self.processedDataset)
231     common.logger.debug("processedDataset " + self.processedDataset)
232     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
233 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
234 ewv 1.329
235 fanzago 1.318 if (self.datasetPath != None ):
236     len_primary = len(self.primaryDataset)
237     common.logger.debug("primaryDataset = " + self.primaryDataset)
238     common.logger.debug("len_primary = " + str(len_primary))
239 fanzago 1.328 if (len_primary > 100):
240     raise CrabException("Warning: primary datasetname has to be < 100 characters")
241     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
242     #if (len_processedDataset > (59 - len_user_name - len_primary)):
243     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
244     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
245     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
246 fanzago 1.318 else:
247 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
248     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
249     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
250     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
251 ewv 1.329 """
252 ewv 1.276
253     self.conf = {}
254     self.conf['pubdata'] = None
255 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
256 slacapra 1.1 #DBSDLS-start
257 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
258 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
259     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
260 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
261 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
262 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
263 gutsche 1.35 blockSites = {}
264 slacapra 1.9 if self.datasetPath:
265 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
266 ewv 1.131 #DBSDLS-end
267 spiga 1.269 self.conf['blockSites']=blockSites
268    
269 slacapra 1.9 ## Select Splitting
270 spiga 1.269 splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
271    
272 ewv 1.131 if self.selectNoInput:
273 spiga 1.187 if self.pset == None:
274 ewv 1.276 self.algo = 'ForScript'
275 spiga 1.42 else:
276 spiga 1.271 self.algo = 'NoInput'
277 ewv 1.276 self.conf['managedGenerators']=self.managedGenerators
278     self.conf['generator']=self.generator
279 ewv 1.326 elif self.ads:
280     self.algo = 'LumiBased'
281 ewv 1.276 elif splitByRun ==1:
282     self.algo = 'RunBased'
283 spiga 1.269 else:
284 ewv 1.276 self.algo = 'EventBased'
285 ewv 1.326 common.logger.debug("Job splitting method: %s" % self.algo)
286 ewv 1.276
287     splitter = JobSplitter(self.cfg_params,self.conf)
288 spiga 1.269 self.dict = splitter.Algos()[self.algo]()
289 gutsche 1.5
290 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
291     self.rootArgsFilename= 'arguments'
292 spiga 1.208 # modify Pset only the first time
293 spiga 1.320 if isNew:
294     if self.pset != None: self.ModifyPset()
295 spiga 1.300
296 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
297     self.tarNameWithPath = self.getTarBall(self.executable)
298 spiga 1.293
299    
300     def ModifyPset(self):
301     import PsetManipulator as pp
302 ewv 1.335
303     # If pycfg_params set, fake out the config script
304     # to make it think it was called with those args
305     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
306     if pycfg_params:
307     trueArgv = sys.argv
308     sys.argv = [self.pset]
309     sys.argv.extend(pycfg_params.split(' '))
310 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
311 ewv 1.335 if pycfg_params: # Restore original sys.argv
312     sys.argv = trueArgv
313    
314 spiga 1.293 try:
315     # Add FrameworkJobReport to parameter-set, set max events.
316     # Reset later for data jobs by writeCFG which does all modifications
317 ewv 1.295 PsetEdit.maxEvent(1)
318 spiga 1.293 PsetEdit.skipEvent(0)
319     PsetEdit.psetWriter(self.configFilename())
320     ## If present, add TFileService to output files
321     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
322     tfsOutput = PsetEdit.getTFileService()
323     if tfsOutput:
324     if tfsOutput in self.output_file:
325 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
326 spiga 1.293 else:
327     outfileflag = True #output found
328     self.output_file.append(tfsOutput)
329 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
330 spiga 1.293 pass
331     pass
332 ewv 1.321 # If present and requested, add PoolOutputModule to output files
333 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
334 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
335     if edmOutput:
336 ewv 1.321 for outputFile in edmOutput:
337     if outputFile in self.output_file:
338 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
339 ewv 1.321 else:
340     self.output_file.append(outputFile)
341     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
342     # not requested, check anyhow to avoid accidental T2 overload
343 slacapra 1.297 else:
344 ewv 1.321 if edmOutput:
345     missedFiles = []
346     for outputFile in edmOutput:
347     if outputFile not in self.output_file:
348     missedFiles.append(outputFile)
349     if missedFiles:
350     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
351     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
352     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
353     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
354     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
355     common.logger.info(msg)
356 spiga 1.322 else :
357 ewv 1.321 raise CrabException(msg)
358 ewv 1.301
359     if (PsetEdit.getBadFilesSetting()):
360     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
361 spiga 1.304 common.logger.info(msg)
362 ewv 1.301
363 slacapra 1.297 except CrabException, msg:
364 spiga 1.304 common.logger.info(str(msg))
365 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
366 spiga 1.293 raise CrabException(msg)
367    
368 gutsche 1.3
369 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
370    
371 slacapra 1.86 import DataDiscovery
372     import DataLocation
373 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
374 gutsche 1.3
375     datasetPath=self.datasetPath
376    
377 slacapra 1.1 ## Contact the DBS
378 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
379 slacapra 1.1 try:
380 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
381 slacapra 1.1 self.pubdata.fetchDBSInfo()
382    
383 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
384 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
385     raise CrabException(msg)
386 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
387 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
388     raise CrabException(msg)
389 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
390 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
391 slacapra 1.1 raise CrabException(msg)
392    
393 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
394 slacapra 1.270 #print self.filesbyblock
395 spiga 1.269 self.conf['pubdata']=self.pubdata
396 gutsche 1.3
397 slacapra 1.1 ## get max number of events
398 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
399 slacapra 1.1
400     ## Contact the DLS and build a list of sites hosting the fileblocks
401     try:
402 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
403 gutsche 1.6 dataloc.fetchDLSInfo()
404 slacapra 1.263
405 slacapra 1.41 except DataLocation.DataLocationError , ex:
406 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
407     raise CrabException(msg)
408 ewv 1.131
409 slacapra 1.1
410 slacapra 1.270 unsorted_sites = dataloc.getSites()
411     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
412     for lfn in self.filesbyblock.keys():
413     if unsorted_sites.has_key(lfn):
414     sites[lfn]=unsorted_sites[lfn]
415     else:
416     sites[lfn]=[]
417    
418 slacapra 1.264 if len(sites)==0:
419 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
420     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
421     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
422 slacapra 1.264 raise CrabException(msg)
423    
424 gutsche 1.35 allSites = []
425     listSites = sites.values()
426 slacapra 1.63 for listSite in listSites:
427     for oneSite in listSite:
428 gutsche 1.35 allSites.append(oneSite)
429 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
430 ewv 1.295
431 gutsche 1.3
432 gutsche 1.92 # screen output
433 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
434 gutsche 1.92
435 gutsche 1.35 return sites
436 ewv 1.131
437 spiga 1.42
438 spiga 1.208 def split(self, jobParams,firstJobID):
439 ewv 1.276
440 spiga 1.293 jobParams = self.dict['args']
441 spiga 1.269 njobs = self.dict['njobs']
442     self.jobDestination = self.dict['jobDestination']
443 ewv 1.131
444 ewv 1.333 if njobs == 0:
445     raise CrabException("Asked to split zero jobs: aborting")
446     if not self.server and not self.local and njobs > 500:
447     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
448 slacapra 1.263
449 gutsche 1.3 # create the empty structure
450     for i in range(njobs):
451     jobParams.append("")
452 ewv 1.131
453 spiga 1.165 listID=[]
454     listField=[]
455 spiga 1.293 listDictions=[]
456 spiga 1.300 exist= os.path.exists(self.argsFile)
457 spiga 1.208 for id in range(njobs):
458     job = id + int(firstJobID)
459 spiga 1.167 listID.append(job+1)
460 spiga 1.162 job_ToSave ={}
461 spiga 1.169 concString = ' '
462 spiga 1.165 argu=''
463 spiga 1.293 str_argu = str(job+1)
464 spiga 1.208 if len(jobParams[id]):
465 ewv 1.295 argu = {'JobID': job+1}
466 spiga 1.293 for i in range(len(jobParams[id])):
467     argu[self.dict['params'][i]]=jobParams[id][i]
468 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
469 ewv 1.295 # just for debug
470 spiga 1.293 str_argu += concString.join(jobParams[id])
471 spiga 1.314 if argu != '': listDictions.append(argu)
472 spiga 1.298 job_ToSave['arguments']= str(job+1)
473 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
474 spiga 1.165 listField.append(job_ToSave)
475 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
476     cms_se = CmsSEMap()
477 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
478 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
479 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
480     msg+="\t CMSDestination: %s "%(str(SEDestination))
481 spiga 1.307 common.logger.log(10-1,msg)
482 spiga 1.293 # write xml
483 ewv 1.295 if len(listDictions):
484 spiga 1.293 if exist==False: self.CreateXML()
485     self.addEntry(listDictions)
486 spiga 1.320 # self.zipXMLfile()
487 spiga 1.187 common._db.updateJob_(listID,listField)
488 spiga 1.293 return
489 ewv 1.313
490 spiga 1.320 # def zipXMLfile(self):
491 ewv 1.313
492 spiga 1.320 # import tarfile
493     # try:
494     # tar = tarfile.open(self.tarNameWithPath, "a")
495     # tar.add(self.argsFile, os.path.basename(self.argsFile))
496     # tar.close()
497     # except IOError, exc:
498     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
499     # msg += str(exc)
500     # raise CrabException(msg)
501     # except tarfile.TarError, exc:
502     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
503     # msg += str(exc)
504     # raise CrabException(msg)
505 ewv 1.325
506 spiga 1.293 def CreateXML(self):
507     """
508 ewv 1.295 """
509 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
510     outfile = file( self.argsFile, 'w').write(str(result))
511 ewv 1.295 return
512 spiga 1.293
513     def addEntry(self, listDictions):
514     """
515     _addEntry_
516 ewv 1.295
517 spiga 1.293 add an entry to the xml file
518     """
519     from IMProv.IMProvLoader import loadIMProvFile
520     ## load xml
521 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
522 spiga 1.293 entrname= 'Job'
523     for dictions in listDictions:
524     report = IMProvNode(entrname , None, **dictions)
525     improvDoc.addNode(report)
526 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
527 gutsche 1.3 return
528 ewv 1.131
529 gutsche 1.3 def numberOfJobs(self):
530 spiga 1.269 return self.dict['njobs']
531 gutsche 1.3
532 slacapra 1.1 def getTarBall(self, exe):
533     """
534     Return the TarBall with lib and exe
535     """
536 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
537     if os.path.exists(self.tgzNameWithPath):
538     return self.tgzNameWithPath
539 slacapra 1.1
540     # Prepare a tar gzipped file with user binaries.
541     self.buildTar_(exe)
542    
543 spiga 1.320 return string.strip(self.tgzNameWithPath)
544 slacapra 1.1
545     def buildTar_(self, executable):
546    
547     # First of all declare the user Scram area
548     swArea = self.scram.getSWArea_()
549     swReleaseTop = self.scram.getReleaseTop_()
550 ewv 1.131
551 slacapra 1.1 ## check if working area is release top
552     if swReleaseTop == '' or swArea == swReleaseTop:
553 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
554 slacapra 1.1 return
555    
556 slacapra 1.61 import tarfile
557     try: # create tar ball
558 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
559 slacapra 1.61 ## First find the executable
560 slacapra 1.86 if (self.executable != ''):
561 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
562     if ( not exeWithPath ):
563     raise CrabException('User executable '+executable+' not found')
564 ewv 1.131
565 slacapra 1.61 ## then check if it's private or not
566     if exeWithPath.find(swReleaseTop) == -1:
567     # the exe is private, so we must ship
568 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
569 slacapra 1.61 path = swArea+'/'
570 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
571     if exeWithPath.find(path) >= 0 :
572     exe = string.replace(exeWithPath, path,'')
573 slacapra 1.129 tar.add(path+exe,exe)
574 corvo 1.85 else :
575     tar.add(exeWithPath,os.path.basename(executable))
576 slacapra 1.61 pass
577     else:
578     # the exe is from release, we'll find it on WN
579     pass
580 ewv 1.131
581 slacapra 1.61 ## Now get the libraries: only those in local working area
582 slacapra 1.256 tar.dereference=True
583 slacapra 1.61 libDir = 'lib'
584     lib = swArea+'/' +libDir
585 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
586 slacapra 1.61 if os.path.exists(lib):
587     tar.add(lib,libDir)
588 ewv 1.131
589 slacapra 1.61 ## Now check if module dir is present
590     moduleDir = 'module'
591     module = swArea + '/' + moduleDir
592     if os.path.isdir(module):
593     tar.add(module,moduleDir)
594 slacapra 1.256 tar.dereference=False
595 slacapra 1.61
596     ## Now check if any data dir(s) is present
597 spiga 1.179 self.dataExist = False
598 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
599 slacapra 1.206 while len(todo_list):
600     entry, name = todo_list.pop()
601 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
602 slacapra 1.206 continue
603 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
604 slacapra 1.206 entryPath = entry + '/'
605 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
606 slacapra 1.206 if name == 'data':
607     self.dataExist=True
608 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
609 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
610 slacapra 1.206 pass
611     pass
612 ewv 1.182
613 spiga 1.179 ### CMSSW ParameterSet
614     if not self.pset is None:
615     cfg_file = common.work_space.jobDir()+self.configFilename()
616 ewv 1.182 tar.add(cfg_file,self.configFilename())
617 ewv 1.313
618 spiga 1.309 try:
619     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
620     tar.add(crab_cfg_file,'crab.cfg')
621     except:
622     pass
623 fanzago 1.93
624 fanzago 1.152 ## Add ProdCommon dir to tar
625 slacapra 1.211 prodcommonDir = './'
626     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
627 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
628 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
629     'WMCore/__init__.py','WMCore/Algorithms']
630 slacapra 1.214 for file in neededStuff:
631     tar.add(prodcommonPath+file,prodcommonDir+file)
632 spiga 1.179
633     ##### ML stuff
634     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
635     path=os.environ['CRABDIR'] + '/python/'
636     for file in ML_file_list:
637     tar.add(path+file,file)
638    
639     ##### Utils
640 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
641 spiga 1.179 for file in Utils_file_list:
642     tar.add(path+file,file)
643 ewv 1.131
644 ewv 1.182 ##### AdditionalFiles
645 slacapra 1.253 tar.dereference=True
646 spiga 1.179 for file in self.additional_inbox_files:
647     tar.add(file,string.split(file,'/')[-1])
648 slacapra 1.253 tar.dereference=False
649 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
650 ewv 1.182
651 slacapra 1.61 tar.close()
652 mcinquil 1.241 except IOError, exc:
653 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
654 spiga 1.304 msg += str(exc)
655     raise CrabException(msg)
656 mcinquil 1.241 except tarfile.TarError, exc:
657 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
658 spiga 1.304 msg += str(exc)
659     raise CrabException(msg)
660 spiga 1.300
661 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
662     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
663 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
664 ewv 1.250 +'MB input sandbox limit \n'
665 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
666     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
667     msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
668     raise CrabException(msg)
669 gutsche 1.72
670 slacapra 1.61 ## create tar-ball with ML stuff
671 slacapra 1.97
672 spiga 1.165 def wsSetupEnvironment(self, nj=0):
673 slacapra 1.1 """
674     Returns part of a job script which prepares
675     the execution environment for the job 'nj'.
676     """
677 ewv 1.334 psetName = 'pset.py'
678    
679 slacapra 1.1 # Prepare JobType-independent part
680 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
681 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
682 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
683 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
684 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
685 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
686 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
687 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
688     txt += ' job_exit_code=10016\n'
689     txt += ' func_exit\n'
690 gutsche 1.3 txt += ' fi\n'
691 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
692 gutsche 1.3 txt += '\n'
693     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
694     txt += ' cd $WORKING_DIR\n'
695 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
696 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
697 spiga 1.282 #Setup SGE Environment
698 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
699 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
700    
701 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
702     txt += self.wsSetupCMSLCGEnvironment_()
703    
704 gutsche 1.3 txt += 'fi\n'
705 slacapra 1.1
706     # Prepare JobType-specific part
707     scram = self.scram.commandName()
708     txt += '\n\n'
709 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
710     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
711 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
712     txt += 'status=$?\n'
713     txt += 'if [ $status != 0 ] ; then\n'
714 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
715     txt += ' job_exit_code=10034\n'
716 fanzago 1.163 txt += ' func_exit\n'
717 slacapra 1.1 txt += 'fi \n'
718     txt += 'cd '+self.version+'\n'
719 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
720 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
721 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
722 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
723     txt += ' echo "ERROR ==> Problem with the command: "\n'
724     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
725     txt += ' job_exit_code=10034\n'
726     txt += ' func_exit\n'
727     txt += 'fi \n'
728 slacapra 1.1 # Handle the arguments:
729     txt += "\n"
730 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
731 slacapra 1.1 txt += "\n"
732 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
733 slacapra 1.1 txt += "then\n"
734 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
735     txt += ' job_exit_code=50113\n'
736     txt += " func_exit\n"
737 slacapra 1.1 txt += "fi\n"
738     txt += "\n"
739    
740     # Prepare job-specific part
741     job = common.job_list[nj]
742 ewv 1.131 if (self.datasetPath):
743 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
744     #DataTier = self.datasetPath.split("/")[2]
745 fanzago 1.93 txt += '\n'
746     txt += 'DatasetPath='+self.datasetPath+'\n'
747    
748 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
749 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
750 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
751 fanzago 1.93
752     else:
753 fanzago 1.318 #self.primaryDataset = 'null'
754 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
755     txt += 'PrimaryDataset=null\n'
756     txt += 'DataTier=null\n'
757     txt += 'ApplicationFamily=MCDataTier\n'
758 ewv 1.170 if self.pset != None:
759 spiga 1.42 pset = os.path.basename(job.configFilename())
760     txt += '\n'
761 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
762 spiga 1.296
763 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
764     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
765     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
766     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
767 slacapra 1.90
768 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
769 ewv 1.319 else:
770 spiga 1.314 txt += '\n'
771 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
772 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
773 gutsche 1.3 return txt
774 slacapra 1.176
775 fanzago 1.166 def wsUntarSoftware(self, nj=0):
776 gutsche 1.3 """
777     Put in the script the commands to build an executable
778     or a library.
779     """
780    
781 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
782 gutsche 1.3
783     if os.path.isfile(self.tgzNameWithPath):
784 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
785 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
786 fanzago 1.285 if self.debug_wrapper==1 :
787 spiga 1.199 txt += 'ls -Al \n'
788 gutsche 1.3 txt += 'untar_status=$? \n'
789     txt += 'if [ $untar_status -ne 0 ]; then \n'
790 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
791     txt += ' job_exit_code=$untar_status\n'
792     txt += ' func_exit\n'
793 gutsche 1.3 txt += 'else \n'
794     txt += ' echo "Successful untar" \n'
795     txt += 'fi \n'
796 gutsche 1.50 txt += '\n'
797 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
798 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
799 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
800 gutsche 1.50 txt += 'else\n'
801 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
802 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
803 gutsche 1.50 txt += 'fi\n'
804     txt += '\n'
805    
806 gutsche 1.3 pass
807 ewv 1.131
808 slacapra 1.1 return txt
809 ewv 1.170
810 fanzago 1.166 def wsBuildExe(self, nj=0):
811     """
812     Put in the script the commands to build an executable
813     or a library.
814     """
815    
816     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
817     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
818    
819 ewv 1.170 txt += 'rm -r lib/ module/ \n'
820     txt += 'mv $RUNTIME_AREA/lib/ . \n'
821     txt += 'mv $RUNTIME_AREA/module/ . \n'
822 spiga 1.186 if self.dataExist == True:
823     txt += 'rm -r src/ \n'
824     txt += 'mv $RUNTIME_AREA/src/ . \n'
825 ewv 1.182 if len(self.additional_inbox_files)>0:
826 spiga 1.179 for file in self.additional_inbox_files:
827 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
828 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
829     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
830 ewv 1.170
831 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
832 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
833 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
834 fanzago 1.166 txt += 'else\n'
835 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
836 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
837     txt += 'fi\n'
838     txt += '\n'
839    
840 slacapra 1.302 if self.pset != None:
841 ewv 1.334 psetName = 'pset.py'
842    
843 slacapra 1.302 txt += '\n'
844     if self.debug_wrapper == 1:
845     txt += 'echo "***** cat ' + psetName + ' *********"\n'
846     txt += 'cat ' + psetName + '\n'
847     txt += 'echo "****** end ' + psetName + ' ********"\n'
848     txt += '\n'
849     txt += 'echo "***********************" \n'
850     txt += 'which edmConfigHash \n'
851     txt += 'echo "***********************" \n'
852 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
853     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
854 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
855     #### FEDE temporary fix for noEdm files #####
856     txt += 'if [ -z "$PSETHASH" ]; then \n'
857     txt += ' export PSETHASH=null\n'
858     txt += 'fi \n'
859     #############################################
860     txt += '\n'
861 fanzago 1.166 return txt
862 slacapra 1.1
863 ewv 1.131
864 slacapra 1.1 def executableName(self):
865 ewv 1.192 if self.scriptExe:
866 spiga 1.42 return "sh "
867     else:
868     return self.executable
869 slacapra 1.1
870     def executableArgs(self):
871 ewv 1.276 if self.scriptExe:
872 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
873 fanzago 1.115 else:
874 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
875 slacapra 1.1
876     def inputSandbox(self, nj):
877     """
878     Returns a list of filenames to be put in JDL input sandbox.
879     """
880     inp_box = []
881     if os.path.isfile(self.tgzNameWithPath):
882     inp_box.append(self.tgzNameWithPath)
883 spiga 1.320 if os.path.isfile(self.argsFile):
884     inp_box.append(self.argsFile)
885 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
886 slacapra 1.1 return inp_box
887    
888     def outputSandbox(self, nj):
889     """
890     Returns a list of filenames to be put in JDL output sandbox.
891     """
892     out_box = []
893    
894     ## User Declared output files
895 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
896 ewv 1.131 n_out = nj + 1
897 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
898 slacapra 1.1 return out_box
899    
900    
901     def wsRenameOutput(self, nj):
902     """
903     Returns part of a job script which renames the produced files.
904     """
905    
906 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
907 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
908     txt += 'echo ">>> current directory content:"\n'
909 fanzago 1.285 if self.debug_wrapper==1:
910 spiga 1.199 txt += 'ls -Al\n'
911 fanzago 1.145 txt += '\n'
912 slacapra 1.54
913 fanzago 1.128 for fileWithSuffix in (self.output_file):
914 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
915 slacapra 1.1 txt += '\n'
916 gutsche 1.7 txt += '# check output file\n'
917 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
918 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
919     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
920 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
921 ewv 1.147 else:
922     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
923     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
924 slacapra 1.106 txt += 'else\n'
925 fanzago 1.161 txt += ' job_exit_code=60302\n'
926     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
927 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
928 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
929     txt += ' echo "prepare dummy output file"\n'
930     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
931     txt += ' fi \n'
932 slacapra 1.1 txt += 'fi\n'
933 slacapra 1.105 file_list = []
934     for fileWithSuffix in (self.output_file):
935 spiga 1.246 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
936 ewv 1.131
937 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
938 fanzago 1.149 txt += '\n'
939 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
940     txt += 'echo ">>> current directory content:"\n'
941 fanzago 1.285 if self.debug_wrapper==1:
942 spiga 1.199 txt += 'ls -Al\n'
943 fanzago 1.148 txt += '\n'
944 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
945 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
946 slacapra 1.1 return txt
947    
948 slacapra 1.63 def getRequirements(self, nj=[]):
949 slacapra 1.1 """
950 ewv 1.131 return job requirements to add to jdl files
951 slacapra 1.1 """
952     req = ''
953 slacapra 1.47 if self.version:
954 slacapra 1.10 req='Member("VO-cms-' + \
955 slacapra 1.47 self.version + \
956 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
957 ewv 1.192 if self.executable_arch:
958 gutsche 1.107 req+=' && Member("VO-cms-' + \
959 slacapra 1.105 self.executable_arch + \
960     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
961 gutsche 1.35
962     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
963 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
964 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
965     if ( self.cfg_params.get('GRID.use_cream',None) ):
966     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
967     else:
968     req += ' && other.GlueCEStateStatus == "Production" '
969 gutsche 1.35
970 slacapra 1.1 return req
971 gutsche 1.3
972     def configFilename(self):
973     """ return the config filename """
974 ewv 1.334 return self.name()+'.py'
975 gutsche 1.3
976     def wsSetupCMSOSGEnvironment_(self):
977     """
978     Returns part of a job script which is prepares
979     the execution environment and which is common for all CMS jobs.
980     """
981 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
982     txt += ' echo ">>> setup CMS OSG environment:"\n'
983 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
984     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
985 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
986 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
987 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
988 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
989     txt += ' else\n'
990 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
991     txt += ' job_exit_code=10020\n'
992     txt += ' func_exit\n'
993 fanzago 1.133 txt += ' fi\n'
994 gutsche 1.3 txt += '\n'
995 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
996 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
997 gutsche 1.3
998     return txt
999 ewv 1.131
1000 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1001     """
1002     Returns part of a job script which is prepares
1003     the execution environment and which is common for all CMS jobs.
1004     """
1005 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1006     txt += ' echo ">>> setup CMS LCG environment:"\n'
1007 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1008     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1009     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1010     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1011 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1012     txt += ' job_exit_code=10031\n'
1013     txt += ' func_exit\n'
1014 fanzago 1.133 txt += ' else\n'
1015     txt += ' echo "Sourcing environment... "\n'
1016     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1017 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1018     txt += ' job_exit_code=10020\n'
1019     txt += ' func_exit\n'
1020 fanzago 1.133 txt += ' fi\n'
1021     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1022     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1023     txt += ' result=$?\n'
1024     txt += ' if [ $result -ne 0 ]; then\n'
1025 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1026     txt += ' job_exit_code=10032\n'
1027     txt += ' func_exit\n'
1028 fanzago 1.133 txt += ' fi\n'
1029     txt += ' fi\n'
1030     txt += ' \n'
1031 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1032 gutsche 1.3 return txt
1033 gutsche 1.5
1034 spiga 1.238 def wsModifyReport(self, nj):
1035 fanzago 1.93 """
1036 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1037 fanzago 1.93 """
1038 ewv 1.250
1039 fanzago 1.281 txt = ''
1040 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1041 fanzago 1.292 if (self.copy_data == 1):
1042 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1043 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1044 ewv 1.283
1045 spiga 1.238
1046     txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1047 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1048 fanzago 1.175 txt += 'else\n'
1049     txt += ' FOR_LFN=/copy_problems/ \n'
1050     txt += 'fi\n'
1051 ewv 1.182
1052 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1053 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1054 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1055     txt += 'echo "SE_PATH = $SE_PATH"\n'
1056     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1057     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1058 fanzago 1.281
1059    
1060 fanzago 1.323 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1061 fanzago 1.318 if (self.publish_data == 1):
1062     #processedDataset = self.cfg_params['USER.publish_data_name']
1063     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1064 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1065     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1066 fanzago 1.281
1067 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1068     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1069 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1070     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1071     txt += ' modifyReport_result=70500\n'
1072     txt += ' job_exit_code=$modifyReport_result\n'
1073     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1074     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1075     txt += 'else\n'
1076     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1077 spiga 1.103 txt += 'fi\n'
1078 fanzago 1.93 return txt
1079 ewv 1.283
1080 ewv 1.192 def wsParseFJR(self):
1081 spiga 1.189 """
1082 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1083 spiga 1.189 """
1084     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1085     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1086     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1087     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1088 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1089 fanzago 1.285 if self.debug_wrapper==1 :
1090 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1091     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1092 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1093     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1094 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1095 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1096 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1097     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1098 spiga 1.189 txt += ' else\n'
1099     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1100     txt += ' fi\n'
1101     txt += ' else\n'
1102     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1103     txt += ' fi\n'
1104     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1105 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1106 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1107 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1108 spiga 1.296 """
1109 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1110 spiga 1.189 # VERIFY PROCESSED DATA
1111 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1112     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1113     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1114     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1115     txt += ' mv tmp.txt input-files.txt\n'
1116     txt += ' echo "cat input-files.txt"\n'
1117     txt += ' echo "----------------------"\n'
1118     txt += ' cat input-files.txt\n'
1119     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1120     txt += ' mv tmp.txt processed-files.txt\n'
1121     txt += ' echo "----------------------"\n'
1122     txt += ' echo "cat processed-files.txt"\n'
1123     txt += ' echo "----------------------"\n'
1124     txt += ' cat processed-files.txt\n'
1125     txt += ' echo "----------------------"\n'
1126 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1127 fanzago 1.273 txt += ' fileverify_status=$?\n'
1128     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1129     txt += ' executable_exit_status=30001\n'
1130     txt += ' echo "ERROR ==> not all input files processed"\n'
1131     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1132     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1133     txt += ' fi\n'
1134 spiga 1.296 """
1135 spiga 1.232 txt += ' fi\n'
1136 spiga 1.189 txt += 'else\n'
1137     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1138     txt += 'fi\n'
1139     txt += '\n'
1140 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1141 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1142     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1143     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1144     txt += ' job_exit_code=$executable_exit_status\n'
1145     txt += ' func_exit\n'
1146     txt += 'fi\n\n'
1147 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1148     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1149     txt += 'job_exit_code=$executable_exit_status\n'
1150    
1151     return txt
1152    
1153 gutsche 1.5 def setParam_(self, param, value):
1154     self._params[param] = value
1155    
1156     def getParams(self):
1157     return self._params
1158 gutsche 1.8
1159 spiga 1.257 def outList(self,list=False):
1160 mcinquil 1.121 """
1161     check the dimension of the output files
1162     """
1163 spiga 1.169 txt = ''
1164     txt += 'echo ">>> list of expected files on output sandbox"\n'
1165 mcinquil 1.121 listOutFiles = []
1166 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1167 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1168 spiga 1.268 if len(self.output_file) <= 0:
1169     msg ="WARNING: no output files name have been defined!!\n"
1170     msg+="\tno output files will be reported back/staged\n"
1171 spiga 1.304 common.logger.info(msg)
1172 fanzago 1.148 if (self.return_data == 1):
1173 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1174 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1175 spiga 1.169 listOutFiles.append(stdout)
1176     listOutFiles.append(stderr)
1177 ewv 1.156 else:
1178 spiga 1.157 for file in (self.output_file_sandbox):
1179 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1180 spiga 1.169 listOutFiles.append(stdout)
1181     listOutFiles.append(stderr)
1182 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1183 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1184 spiga 1.169 txt += 'export filesToCheck\n'
1185 ewv 1.276
1186 spiga 1.257 if list : return self.output_file
1187 ewv 1.170 return txt