ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.318
Committed: Wed Jul 8 09:12:50 2009 UTC (15 years, 9 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.317: +39 -11 lines
Log Message:
added the check of length of datasetname to publish

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_exceptions import *
3     from crab_util import *
4     import common
5     import Scram
6 spiga 1.269 from Splitter import JobSplitter
7 slacapra 1.1
8 spiga 1.293 from IMProv.IMProvNode import IMProvNode
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14 spiga 1.304 common.logger.debug('CMSSW::__init__')
15 spiga 1.208 self.skip_blocks = skip_blocks
16 spiga 1.296 self.argsList = 1
17 spiga 1.315 self.NumEvents=0
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 ewv 1.254
21 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
22     server=self.cfg_params.get('CRAB.server_name',None)
23 ewv 1.250 size = 9.5
24 spiga 1.249 if server or common.scheduler.name().upper() in ['LSF','CAF']: size = 99999
25 spiga 1.234 ### D.S.
26 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
27 gutsche 1.72
28 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
29 gutsche 1.38 self.ncjobs = ncjobs
30    
31 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
32     self.additional_inbox_files = []
33     self.scriptExe = ''
34     self.executable = ''
35 slacapra 1.71 self.executable_arch = self.scram.getArch()
36 spiga 1.300 self.tgz_name = 'default.tar.gz'
37     self.tar_name = 'default.tar'
38 corvo 1.56 self.scriptName = 'CMSSW.sh'
39 ewv 1.192 self.pset = ''
40 spiga 1.187 self.datasetPath = ''
41 gutsche 1.3
42 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
48 ewv 1.313
49 ewv 1.182 try:
50 slacapra 1.317 type, self.CMSSW_major, self.CMSSW_minor, self.CMSSW_patch = tuple(self.version.split('_'))
51 ewv 1.182 except:
52 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
53 ewv 1.182 raise CrabException(msg)
54    
55 ewv 1.276 if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5):
56     msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version."
57     raise CrabException(msg)
58     """
59     As CMSSW versions are dropped we can drop more code:
60     1.X dropped: drop support for running .cfg on WN
61     2.0 dropped: drop all support for cfg here and in writeCfg
62     2.0 dropped: Recheck the random number seed support
63     """
64    
65 slacapra 1.1 ### collect Data cards
66 gutsche 1.66
67 ewv 1.226
68 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
69 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
70 ewv 1.226
71 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
72 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
73 spiga 1.236
74     if tmp =='':
75     msg = "Error: datasetpath not defined "
76     raise CrabException(msg)
77     elif string.lower(tmp)=='none':
78 slacapra 1.153 self.datasetPath = None
79     self.selectNoInput = 1
80 fanzago 1.318 self.primaryDataset = 'null'
81 slacapra 1.153 else:
82     self.datasetPath = tmp
83     self.selectNoInput = 0
84 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
85     self.dataTier = self.datasetPath.split("/")[2]
86 gutsche 1.5
87 slacapra 1.1 self.dataTiers = []
88 ewv 1.295
89 spiga 1.288 self.debugWrap=''
90 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
91     if self.debug_wrapper == 1: self.debugWrap='--debug'
92 slacapra 1.291
93 slacapra 1.1 ## now the application
94 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
95 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
96 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
97 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
98 slacapra 1.1
99 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
100 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
101 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
102 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
103 slacapra 1.153 if self.pset.lower() != 'none' :
104     if (not os.path.exists(self.pset)):
105     raise CrabException("User defined PSet file "+self.pset+" does not exist")
106     else:
107     self.pset = None
108 slacapra 1.1
109     # output files
110 slacapra 1.53 ## stuff which must be returned always via sandbox
111     self.output_file_sandbox = []
112    
113     # add fjr report by default via sandbox
114     self.output_file_sandbox.append(self.fjrFileName)
115    
116     # other output files to be returned via sandbox or copied to SE
117 mcinquil 1.216 outfileflag = False
118 slacapra 1.153 self.output_file = []
119     tmp = cfg_params.get('CMSSW.output_file',None)
120     if tmp :
121 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
122 mcinquil 1.216 outfileflag = True #output found
123     #else:
124     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
125 slacapra 1.1
126     # script_exe file as additional file in inputSandbox
127 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
128     if self.scriptExe :
129 slacapra 1.176 if not os.path.isfile(self.scriptExe):
130     msg ="ERROR. file "+self.scriptExe+" not found"
131     raise CrabException(msg)
132     self.additional_inbox_files.append(string.strip(self.scriptExe))
133 slacapra 1.70
134 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
135     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
136    
137 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
138 slacapra 1.176 msg ="Error. script_exe not defined"
139     raise CrabException(msg)
140 spiga 1.42
141 ewv 1.226 # use parent files...
142 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
143 spiga 1.204
144 slacapra 1.1 ## additional input files
145 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
146 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
147 slacapra 1.70 for tmp in tmpAddFiles:
148     tmp = string.strip(tmp)
149     dirname = ''
150     if not tmp[0]=="/": dirname = "."
151 corvo 1.85 files = []
152     if string.find(tmp,"*")>-1:
153     files = glob.glob(os.path.join(dirname, tmp))
154     if len(files)==0:
155     raise CrabException("No additional input file found with this pattern: "+tmp)
156     else:
157     files.append(tmp)
158 slacapra 1.70 for file in files:
159     if not os.path.exists(file):
160     raise CrabException("Additional input file not found: "+file)
161 slacapra 1.45 pass
162 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
163 slacapra 1.1 pass
164     pass
165 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
166 slacapra 1.153 pass
167 gutsche 1.3
168 gutsche 1.35
169 ewv 1.160 ## New method of dealing with seeds
170     self.incrementSeeds = []
171     self.preserveSeeds = []
172     if cfg_params.has_key('CMSSW.preserve_seeds'):
173     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
174     for tmp in tmpList:
175     tmp.strip()
176     self.preserveSeeds.append(tmp)
177     if cfg_params.has_key('CMSSW.increment_seeds'):
178     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
179     for tmp in tmpList:
180     tmp.strip()
181     self.incrementSeeds.append(tmp)
182    
183 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
184 slacapra 1.90
185 fanzago 1.318 # Copy/return/publish
186 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
187     self.return_data = int(cfg_params.get('USER.return_data',0))
188 fanzago 1.318 ### FEDE ###
189     self.publish_data = int(cfg_params.get('USER.publish_data',0))
190     if (self.publish_data == 1):
191     if not cfg_params.has_key('USER.publish_data_name'):
192     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
193     else:
194     self.processedDataset = cfg_params['USER.publish_data_name']
195     #### check of lenght of datasetname to publish ####
196     common.logger.debug("test 100 char limit on datasetname")
197     user = getUserName()
198     common.logger.debug("user = " + user)
199     len_user_name = len(user)
200     common.logger.debug("len_user_name = " + str(len_user_name))
201     len_processedDataset = len(self.processedDataset)
202     common.logger.debug("processedDataset " + self.processedDataset)
203     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
204     if (self.datasetPath != None ):
205     len_primary = len(self.primaryDataset)
206     common.logger.debug("primaryDataset = " + self.primaryDataset)
207     common.logger.debug("len_primary = " + str(len_primary))
208     #common.logger.info("59 - len_user_name - len_primary = " + str(59 - len_user_name - len_primary))
209     if (len_processedDataset > (59 - len_user_name - len_primary)):
210     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
211     else:
212     if (len_processedDataset > (59 - len_user_name) / 2):
213     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
214 ewv 1.276
215     self.conf = {}
216     self.conf['pubdata'] = None
217 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
218 slacapra 1.1 #DBSDLS-start
219 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
220 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
221     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
222 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
223 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
224 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
225 gutsche 1.35 blockSites = {}
226 slacapra 1.9 if self.datasetPath:
227 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
228 ewv 1.131 #DBSDLS-end
229 spiga 1.269 self.conf['blockSites']=blockSites
230    
231 slacapra 1.9 ## Select Splitting
232 spiga 1.269 splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
233    
234 ewv 1.131 if self.selectNoInput:
235 spiga 1.187 if self.pset == None:
236 ewv 1.276 self.algo = 'ForScript'
237 spiga 1.42 else:
238 spiga 1.271 self.algo = 'NoInput'
239 ewv 1.276 self.conf['managedGenerators']=self.managedGenerators
240     self.conf['generator']=self.generator
241     elif splitByRun ==1:
242     self.algo = 'RunBased'
243 spiga 1.269 else:
244 ewv 1.276 self.algo = 'EventBased'
245    
246     # self.algo = 'LumiBased'
247     splitter = JobSplitter(self.cfg_params,self.conf)
248 spiga 1.269 self.dict = splitter.Algos()[self.algo]()
249 gutsche 1.5
250 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
251     self.rootArgsFilename= 'arguments'
252 spiga 1.208 # modify Pset only the first time
253 spiga 1.300 if (isNew and self.pset != None): self.ModifyPset()
254    
255     ## Prepare inputSandbox TarBall (only the first time)
256     self.tarNameWithPath = self.getTarBall(self.executable)
257 spiga 1.293
258    
259     def ModifyPset(self):
260     import PsetManipulator as pp
261     PsetEdit = pp.PsetManipulator(self.pset)
262     try:
263     # Add FrameworkJobReport to parameter-set, set max events.
264     # Reset later for data jobs by writeCFG which does all modifications
265 ewv 1.295 PsetEdit.maxEvent(1)
266 spiga 1.293 PsetEdit.skipEvent(0)
267     PsetEdit.psetWriter(self.configFilename())
268     ## If present, add TFileService to output files
269     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
270     tfsOutput = PsetEdit.getTFileService()
271     if tfsOutput:
272     if tfsOutput in self.output_file:
273 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
274 spiga 1.293 else:
275     outfileflag = True #output found
276     self.output_file.append(tfsOutput)
277 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
278 spiga 1.293 pass
279     pass
280     ## If present and requested, add PoolOutputModule to output files
281 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
282 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
283     if edmOutput:
284     if edmOutput in self.output_file:
285 spiga 1.304 common.logger.debug("Output from PoolOutputModule "+edmOutput+" already in output files")
286 spiga 1.293 else:
287     self.output_file.append(edmOutput)
288 spiga 1.304 common.logger.info("Adding "+edmOutput+" (from PoolOutputModule) to list of output files")
289 spiga 1.293 pass
290     pass
291 slacapra 1.297 # not required: check anyhow if present, to avoid accidental T2 overload
292     else:
293     if edmOutput and (edmOutput not in self.output_file):
294     msg = "ERROR: a PoolOutputModule is present in your ParameteSet %s \n"%self.pset
295     msg +=" but the file produced ( %s ) is not in the list of output files\n"%edmOutput
296 slacapra 1.299 msg += "WARNING: please remove it. If you want to keep it, add the file to output_files or use CMSSW.get_edm_output\n"
297 slacapra 1.312 if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
298     msg +=" CMSSW.ignore_edm_output==True : Hope you know what you are doing...\n"
299     common.logger.info(msg)
300     else:
301     raise CrabException(msg)
302 slacapra 1.297 pass
303     pass
304 ewv 1.301
305     if (PsetEdit.getBadFilesSetting()):
306     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
307 spiga 1.304 common.logger.info(msg)
308 ewv 1.301
309 slacapra 1.297 except CrabException, msg:
310 spiga 1.304 common.logger.info(str(msg))
311 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
312 spiga 1.293 raise CrabException(msg)
313    
314 gutsche 1.3
315 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
316    
317 slacapra 1.86 import DataDiscovery
318     import DataLocation
319 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
320 gutsche 1.3
321     datasetPath=self.datasetPath
322    
323 slacapra 1.1 ## Contact the DBS
324 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
325 slacapra 1.1 try:
326 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
327 slacapra 1.1 self.pubdata.fetchDBSInfo()
328    
329 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
330 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
331     raise CrabException(msg)
332 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
333 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
334     raise CrabException(msg)
335 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
336 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
337 slacapra 1.1 raise CrabException(msg)
338    
339 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
340 slacapra 1.270 #print self.filesbyblock
341 spiga 1.269 self.conf['pubdata']=self.pubdata
342 gutsche 1.3
343 slacapra 1.1 ## get max number of events
344 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
345 slacapra 1.1
346     ## Contact the DLS and build a list of sites hosting the fileblocks
347     try:
348 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
349 gutsche 1.6 dataloc.fetchDLSInfo()
350 slacapra 1.263
351 slacapra 1.41 except DataLocation.DataLocationError , ex:
352 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
353     raise CrabException(msg)
354 ewv 1.131
355 slacapra 1.1
356 slacapra 1.270 unsorted_sites = dataloc.getSites()
357     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
358     for lfn in self.filesbyblock.keys():
359     if unsorted_sites.has_key(lfn):
360     sites[lfn]=unsorted_sites[lfn]
361     else:
362     sites[lfn]=[]
363    
364 slacapra 1.264 if len(sites)==0:
365 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
366     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
367     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
368 slacapra 1.264 raise CrabException(msg)
369    
370 gutsche 1.35 allSites = []
371     listSites = sites.values()
372 slacapra 1.63 for listSite in listSites:
373     for oneSite in listSite:
374 gutsche 1.35 allSites.append(oneSite)
375 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
376 ewv 1.295
377 gutsche 1.3
378 gutsche 1.92 # screen output
379 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
380 gutsche 1.92
381 gutsche 1.35 return sites
382 ewv 1.131
383 spiga 1.42
384 spiga 1.208 def split(self, jobParams,firstJobID):
385 ewv 1.276
386 spiga 1.293 jobParams = self.dict['args']
387 spiga 1.269 njobs = self.dict['njobs']
388     self.jobDestination = self.dict['jobDestination']
389 ewv 1.131
390 slacapra 1.263 if njobs==0:
391     raise CrabException("Ask to split "+str(njobs)+" jobs: aborting")
392    
393 gutsche 1.3 # create the empty structure
394     for i in range(njobs):
395     jobParams.append("")
396 ewv 1.131
397 spiga 1.165 listID=[]
398     listField=[]
399 spiga 1.293 listDictions=[]
400 spiga 1.300 exist= os.path.exists(self.argsFile)
401 spiga 1.208 for id in range(njobs):
402     job = id + int(firstJobID)
403 spiga 1.167 listID.append(job+1)
404 spiga 1.162 job_ToSave ={}
405 spiga 1.169 concString = ' '
406 spiga 1.165 argu=''
407 spiga 1.293 str_argu = str(job+1)
408 spiga 1.208 if len(jobParams[id]):
409 ewv 1.295 argu = {'JobID': job+1}
410 spiga 1.293 for i in range(len(jobParams[id])):
411     argu[self.dict['params'][i]]=jobParams[id][i]
412 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
413 ewv 1.295 # just for debug
414 spiga 1.293 str_argu += concString.join(jobParams[id])
415 spiga 1.314 if argu != '': listDictions.append(argu)
416 spiga 1.298 job_ToSave['arguments']= str(job+1)
417 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
418 spiga 1.165 listField.append(job_ToSave)
419 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
420     cms_se = CmsSEMap()
421 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
422 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
423 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
424     msg+="\t CMSDestination: %s "%(str(SEDestination))
425 spiga 1.307 common.logger.log(10-1,msg)
426 spiga 1.293 # write xml
427 ewv 1.295 if len(listDictions):
428 spiga 1.293 if exist==False: self.CreateXML()
429     self.addEntry(listDictions)
430 spiga 1.300 self.addXMLfile()
431 spiga 1.187 common._db.updateJob_(listID,listField)
432 spiga 1.300 self.zipTarFile()
433 spiga 1.293 return
434 ewv 1.313
435 spiga 1.300 def addXMLfile(self):
436    
437     import tarfile
438 spiga 1.305 try:
439     tar = tarfile.open(self.tarNameWithPath, "a")
440     tar.add(self.argsFile, os.path.basename(self.argsFile))
441     tar.close()
442     except IOError, exc:
443     msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
444     msg += str(exc)
445     raise CrabException(msg)
446     except tarfile.TarError, exc:
447     msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
448     msg += str(exc)
449     raise CrabException(msg)
450 ewv 1.313
451 spiga 1.293 def CreateXML(self):
452     """
453 ewv 1.295 """
454 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
455     outfile = file( self.argsFile, 'w').write(str(result))
456 ewv 1.295 return
457 spiga 1.293
458     def addEntry(self, listDictions):
459     """
460     _addEntry_
461 ewv 1.295
462 spiga 1.293 add an entry to the xml file
463     """
464     from IMProv.IMProvLoader import loadIMProvFile
465     ## load xml
466 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
467 spiga 1.293 entrname= 'Job'
468     for dictions in listDictions:
469     report = IMProvNode(entrname , None, **dictions)
470     improvDoc.addNode(report)
471 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
472 gutsche 1.3 return
473 ewv 1.131
474 gutsche 1.3 def numberOfJobs(self):
475 spiga 1.269 return self.dict['njobs']
476 gutsche 1.3
477 slacapra 1.1 def getTarBall(self, exe):
478     """
479     Return the TarBall with lib and exe
480     """
481 spiga 1.300 self.tarNameWithPath = common.work_space.pathForTgz()+self.tar_name
482     if os.path.exists(self.tarNameWithPath):
483     return self.tarNameWithPath
484 slacapra 1.1
485     # Prepare a tar gzipped file with user binaries.
486     self.buildTar_(exe)
487    
488 spiga 1.300 return string.strip(self.tarNameWithPath)
489 slacapra 1.1
490     def buildTar_(self, executable):
491    
492     # First of all declare the user Scram area
493     swArea = self.scram.getSWArea_()
494     swReleaseTop = self.scram.getReleaseTop_()
495 ewv 1.131
496 slacapra 1.1 ## check if working area is release top
497     if swReleaseTop == '' or swArea == swReleaseTop:
498 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
499 slacapra 1.1 return
500    
501 slacapra 1.61 import tarfile
502     try: # create tar ball
503 spiga 1.300 #tar = tarfile.open(self.tgzNameWithPath, "w:gz")
504     tar = tarfile.open(self.tarNameWithPath, "w")
505 slacapra 1.61 ## First find the executable
506 slacapra 1.86 if (self.executable != ''):
507 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
508     if ( not exeWithPath ):
509     raise CrabException('User executable '+executable+' not found')
510 ewv 1.131
511 slacapra 1.61 ## then check if it's private or not
512     if exeWithPath.find(swReleaseTop) == -1:
513     # the exe is private, so we must ship
514 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
515 slacapra 1.61 path = swArea+'/'
516 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
517     if exeWithPath.find(path) >= 0 :
518     exe = string.replace(exeWithPath, path,'')
519 slacapra 1.129 tar.add(path+exe,exe)
520 corvo 1.85 else :
521     tar.add(exeWithPath,os.path.basename(executable))
522 slacapra 1.61 pass
523     else:
524     # the exe is from release, we'll find it on WN
525     pass
526 ewv 1.131
527 slacapra 1.61 ## Now get the libraries: only those in local working area
528 slacapra 1.256 tar.dereference=True
529 slacapra 1.61 libDir = 'lib'
530     lib = swArea+'/' +libDir
531 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
532 slacapra 1.61 if os.path.exists(lib):
533     tar.add(lib,libDir)
534 ewv 1.131
535 slacapra 1.61 ## Now check if module dir is present
536     moduleDir = 'module'
537     module = swArea + '/' + moduleDir
538     if os.path.isdir(module):
539     tar.add(module,moduleDir)
540 slacapra 1.256 tar.dereference=False
541 slacapra 1.61
542     ## Now check if any data dir(s) is present
543 spiga 1.179 self.dataExist = False
544 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
545 slacapra 1.206 while len(todo_list):
546     entry, name = todo_list.pop()
547 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
548 slacapra 1.206 continue
549 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
550 slacapra 1.206 entryPath = entry + '/'
551 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
552 slacapra 1.206 if name == 'data':
553     self.dataExist=True
554 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
555 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
556 slacapra 1.206 pass
557     pass
558 ewv 1.182
559 spiga 1.179 ### CMSSW ParameterSet
560     if not self.pset is None:
561     cfg_file = common.work_space.jobDir()+self.configFilename()
562 ewv 1.182 tar.add(cfg_file,self.configFilename())
563 ewv 1.313
564 spiga 1.309 try:
565     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
566     tar.add(crab_cfg_file,'crab.cfg')
567     except:
568     pass
569 fanzago 1.93
570 fanzago 1.152 ## Add ProdCommon dir to tar
571 slacapra 1.211 prodcommonDir = './'
572     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
573 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
574 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
575     'WMCore/__init__.py','WMCore/Algorithms']
576 slacapra 1.214 for file in neededStuff:
577     tar.add(prodcommonPath+file,prodcommonDir+file)
578 spiga 1.179
579     ##### ML stuff
580     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
581     path=os.environ['CRABDIR'] + '/python/'
582     for file in ML_file_list:
583     tar.add(path+file,file)
584    
585     ##### Utils
586 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
587 spiga 1.179 for file in Utils_file_list:
588     tar.add(path+file,file)
589 ewv 1.131
590 ewv 1.182 ##### AdditionalFiles
591 slacapra 1.253 tar.dereference=True
592 spiga 1.179 for file in self.additional_inbox_files:
593     tar.add(file,string.split(file,'/')[-1])
594 slacapra 1.253 tar.dereference=False
595 spiga 1.308 common.logger.log(10-1,"Files in "+self.tarNameWithPath+" : "+str(tar.getnames()))
596 ewv 1.182
597 slacapra 1.61 tar.close()
598 mcinquil 1.241 except IOError, exc:
599 spiga 1.304 msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
600     msg += str(exc)
601     raise CrabException(msg)
602 mcinquil 1.241 except tarfile.TarError, exc:
603 spiga 1.304 msg = 'Could not create tar-ball %s \n'%self.tarNameWithPath
604     msg += str(exc)
605     raise CrabException(msg)
606 spiga 1.300
607 ewv 1.313 def zipTarFile(self):
608    
609 slacapra 1.317 import gzip
610     f_in = open(self.tarNameWithPath, 'rb')
611     f_out = gzip.open(self.tgzNameWithPath, 'wb')
612     f_out.writelines(f_in)
613     f_out.close()
614     f_in.close()
615    
616     # cmd = "gzip -c %s > %s "%(self.tarNameWithPath,self.tgzNameWithPath)
617     # res=runCommand(cmd)
618 gutsche 1.72
619     tarballinfo = os.stat(self.tgzNameWithPath)
620     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
621 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
622 ewv 1.250 +'MB input sandbox limit \n'
623 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
624     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
625     msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
626     raise CrabException(msg)
627 slacapra 1.317 os.remove(self.tarNameWithPath)
628 gutsche 1.72
629 slacapra 1.61 ## create tar-ball with ML stuff
630 slacapra 1.97
631 spiga 1.165 def wsSetupEnvironment(self, nj=0):
632 slacapra 1.1 """
633     Returns part of a job script which prepares
634     the execution environment for the job 'nj'.
635     """
636 ewv 1.276 # FUTURE: Drop support for .cfg when possible
637 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
638     psetName = 'pset.py'
639     else:
640     psetName = 'pset.cfg'
641 slacapra 1.1 # Prepare JobType-independent part
642 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
643 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
644 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
645 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
646 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
647 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
648 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
649 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
650     txt += ' job_exit_code=10016\n'
651     txt += ' func_exit\n'
652 gutsche 1.3 txt += ' fi\n'
653 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
654 gutsche 1.3 txt += '\n'
655     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
656     txt += ' cd $WORKING_DIR\n'
657 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
658 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
659 spiga 1.282 #Setup SGE Environment
660 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
661 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
662    
663 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
664     txt += self.wsSetupCMSLCGEnvironment_()
665    
666 gutsche 1.3 txt += 'fi\n'
667 slacapra 1.1
668     # Prepare JobType-specific part
669     scram = self.scram.commandName()
670     txt += '\n\n'
671 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
672     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
673 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
674     txt += 'status=$?\n'
675     txt += 'if [ $status != 0 ] ; then\n'
676 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
677     txt += ' job_exit_code=10034\n'
678 fanzago 1.163 txt += ' func_exit\n'
679 slacapra 1.1 txt += 'fi \n'
680     txt += 'cd '+self.version+'\n'
681 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
682 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
683 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
684 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
685     txt += ' echo "ERROR ==> Problem with the command: "\n'
686     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
687     txt += ' job_exit_code=10034\n'
688     txt += ' func_exit\n'
689     txt += 'fi \n'
690 slacapra 1.1 # Handle the arguments:
691     txt += "\n"
692 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
693 slacapra 1.1 txt += "\n"
694 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
695 slacapra 1.1 txt += "then\n"
696 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
697     txt += ' job_exit_code=50113\n'
698     txt += " func_exit\n"
699 slacapra 1.1 txt += "fi\n"
700     txt += "\n"
701    
702     # Prepare job-specific part
703     job = common.job_list[nj]
704 ewv 1.131 if (self.datasetPath):
705 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
706     #DataTier = self.datasetPath.split("/")[2]
707 fanzago 1.93 txt += '\n'
708     txt += 'DatasetPath='+self.datasetPath+'\n'
709    
710 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
711 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
712 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
713 fanzago 1.93
714     else:
715 fanzago 1.318 #self.primaryDataset = 'null'
716 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
717     txt += 'PrimaryDataset=null\n'
718     txt += 'DataTier=null\n'
719     txt += 'ApplicationFamily=MCDataTier\n'
720 ewv 1.170 if self.pset != None:
721 spiga 1.42 pset = os.path.basename(job.configFilename())
722     txt += '\n'
723 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
724 spiga 1.296
725 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
726     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
727     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
728     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
729 slacapra 1.90
730 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
731 spiga 1.314 else:
732     txt += '\n'
733 spiga 1.315 if self.AdditionalArgs: txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs)
734     if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
735 gutsche 1.3 return txt
736 slacapra 1.176
737 fanzago 1.166 def wsUntarSoftware(self, nj=0):
738 gutsche 1.3 """
739     Put in the script the commands to build an executable
740     or a library.
741     """
742    
743 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
744 gutsche 1.3
745     if os.path.isfile(self.tgzNameWithPath):
746 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
747 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
748 fanzago 1.285 if self.debug_wrapper==1 :
749 spiga 1.199 txt += 'ls -Al \n'
750 gutsche 1.3 txt += 'untar_status=$? \n'
751     txt += 'if [ $untar_status -ne 0 ]; then \n'
752 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
753     txt += ' job_exit_code=$untar_status\n'
754     txt += ' func_exit\n'
755 gutsche 1.3 txt += 'else \n'
756     txt += ' echo "Successful untar" \n'
757     txt += 'fi \n'
758 gutsche 1.50 txt += '\n'
759 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
760 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
761 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
762 gutsche 1.50 txt += 'else\n'
763 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
764 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
765 gutsche 1.50 txt += 'fi\n'
766     txt += '\n'
767    
768 gutsche 1.3 pass
769 ewv 1.131
770 slacapra 1.1 return txt
771 ewv 1.170
772 fanzago 1.166 def wsBuildExe(self, nj=0):
773     """
774     Put in the script the commands to build an executable
775     or a library.
776     """
777    
778     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
779     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
780    
781 ewv 1.170 txt += 'rm -r lib/ module/ \n'
782     txt += 'mv $RUNTIME_AREA/lib/ . \n'
783     txt += 'mv $RUNTIME_AREA/module/ . \n'
784 spiga 1.186 if self.dataExist == True:
785     txt += 'rm -r src/ \n'
786     txt += 'mv $RUNTIME_AREA/src/ . \n'
787 ewv 1.182 if len(self.additional_inbox_files)>0:
788 spiga 1.179 for file in self.additional_inbox_files:
789 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
790 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
791     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
792 ewv 1.170
793 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
794 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
795 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
796 fanzago 1.166 txt += 'else\n'
797 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
798 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
799     txt += 'fi\n'
800     txt += '\n'
801    
802 slacapra 1.302 if self.pset != None:
803 slacapra 1.303 # FUTURE: Drop support for .cfg when possible
804     if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
805     psetName = 'pset.py'
806     else:
807     psetName = 'pset.cfg'
808 slacapra 1.302 # FUTURE: Can simply for 2_1_x and higher
809     txt += '\n'
810     if self.debug_wrapper == 1:
811     txt += 'echo "***** cat ' + psetName + ' *********"\n'
812     txt += 'cat ' + psetName + '\n'
813     txt += 'echo "****** end ' + psetName + ' ********"\n'
814     txt += '\n'
815     txt += 'echo "***********************" \n'
816     txt += 'which edmConfigHash \n'
817     txt += 'echo "***********************" \n'
818     if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
819     txt += 'edmConfigHash ' + psetName + ' \n'
820     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
821     else:
822     txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
823     txt += 'echo "PSETHASH = $PSETHASH" \n'
824     #### FEDE temporary fix for noEdm files #####
825     txt += 'if [ -z "$PSETHASH" ]; then \n'
826     txt += ' export PSETHASH=null\n'
827     txt += 'fi \n'
828     #############################################
829     txt += '\n'
830 fanzago 1.166 return txt
831 slacapra 1.1
832 ewv 1.131
833 slacapra 1.1 def executableName(self):
834 ewv 1.192 if self.scriptExe:
835 spiga 1.42 return "sh "
836     else:
837     return self.executable
838 slacapra 1.1
839     def executableArgs(self):
840 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
841 ewv 1.276 if self.scriptExe:
842 spiga 1.314 return self.scriptExe + " $NJob $AdditionalArgs"
843 fanzago 1.115 else:
844 ewv 1.160 ex_args = ""
845 ewv 1.276 ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
846     # Type of config file depends on CMSSW version
847 ewv 1.184 if self.CMSSW_major >= 2 :
848 ewv 1.171 ex_args += " -p pset.py"
849 fanzago 1.115 else:
850 ewv 1.160 ex_args += " -p pset.cfg"
851     return ex_args
852 slacapra 1.1
853     def inputSandbox(self, nj):
854     """
855     Returns a list of filenames to be put in JDL input sandbox.
856     """
857     inp_box = []
858     if os.path.isfile(self.tgzNameWithPath):
859     inp_box.append(self.tgzNameWithPath)
860 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
861 slacapra 1.1 return inp_box
862    
863     def outputSandbox(self, nj):
864     """
865     Returns a list of filenames to be put in JDL output sandbox.
866     """
867     out_box = []
868    
869     ## User Declared output files
870 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
871 ewv 1.131 n_out = nj + 1
872 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
873 slacapra 1.1 return out_box
874    
875    
876     def wsRenameOutput(self, nj):
877     """
878     Returns part of a job script which renames the produced files.
879     """
880    
881 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
882 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
883     txt += 'echo ">>> current directory content:"\n'
884 fanzago 1.285 if self.debug_wrapper==1:
885 spiga 1.199 txt += 'ls -Al\n'
886 fanzago 1.145 txt += '\n'
887 slacapra 1.54
888 fanzago 1.128 for fileWithSuffix in (self.output_file):
889 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
890 slacapra 1.1 txt += '\n'
891 gutsche 1.7 txt += '# check output file\n'
892 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
893 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
894     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
895 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
896 ewv 1.147 else:
897     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
898     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
899 slacapra 1.106 txt += 'else\n'
900 fanzago 1.161 txt += ' job_exit_code=60302\n'
901     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
902 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
903 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
904     txt += ' echo "prepare dummy output file"\n'
905     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
906     txt += ' fi \n'
907 slacapra 1.1 txt += 'fi\n'
908 slacapra 1.105 file_list = []
909     for fileWithSuffix in (self.output_file):
910 spiga 1.246 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
911 ewv 1.131
912 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
913 fanzago 1.149 txt += '\n'
914 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
915     txt += 'echo ">>> current directory content:"\n'
916 fanzago 1.285 if self.debug_wrapper==1:
917 spiga 1.199 txt += 'ls -Al\n'
918 fanzago 1.148 txt += '\n'
919 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
920 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
921 slacapra 1.1 return txt
922    
923 slacapra 1.63 def getRequirements(self, nj=[]):
924 slacapra 1.1 """
925 ewv 1.131 return job requirements to add to jdl files
926 slacapra 1.1 """
927     req = ''
928 slacapra 1.47 if self.version:
929 slacapra 1.10 req='Member("VO-cms-' + \
930 slacapra 1.47 self.version + \
931 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
932 ewv 1.192 if self.executable_arch:
933 gutsche 1.107 req+=' && Member("VO-cms-' + \
934 slacapra 1.105 self.executable_arch + \
935     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
936 gutsche 1.35
937     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
938 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
939 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
940     if ( self.cfg_params.get('GRID.use_cream',None) ):
941     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
942     else:
943     req += ' && other.GlueCEStateStatus == "Production" '
944 gutsche 1.35
945 slacapra 1.1 return req
946 gutsche 1.3
947     def configFilename(self):
948     """ return the config filename """
949 ewv 1.182 # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
950 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
951 slacapra 1.316 return self.name()+'.py'
952 ewv 1.182 else:
953 slacapra 1.316 return self.name()+'.cfg'
954 gutsche 1.3
955     def wsSetupCMSOSGEnvironment_(self):
956     """
957     Returns part of a job script which is prepares
958     the execution environment and which is common for all CMS jobs.
959     """
960 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
961     txt += ' echo ">>> setup CMS OSG environment:"\n'
962 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
963     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
964 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
965 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
966 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
967 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
968     txt += ' else\n'
969 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
970     txt += ' job_exit_code=10020\n'
971     txt += ' func_exit\n'
972 fanzago 1.133 txt += ' fi\n'
973 gutsche 1.3 txt += '\n'
974 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
975 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
976 gutsche 1.3
977     return txt
978 ewv 1.131
979 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
980     """
981     Returns part of a job script which is prepares
982     the execution environment and which is common for all CMS jobs.
983     """
984 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
985     txt += ' echo ">>> setup CMS LCG environment:"\n'
986 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
987     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
988     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
989     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
990 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
991     txt += ' job_exit_code=10031\n'
992     txt += ' func_exit\n'
993 fanzago 1.133 txt += ' else\n'
994     txt += ' echo "Sourcing environment... "\n'
995     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
996 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
997     txt += ' job_exit_code=10020\n'
998     txt += ' func_exit\n'
999 fanzago 1.133 txt += ' fi\n'
1000     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1001     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1002     txt += ' result=$?\n'
1003     txt += ' if [ $result -ne 0 ]; then\n'
1004 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1005     txt += ' job_exit_code=10032\n'
1006     txt += ' func_exit\n'
1007 fanzago 1.133 txt += ' fi\n'
1008     txt += ' fi\n'
1009     txt += ' \n'
1010 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1011 gutsche 1.3 return txt
1012 gutsche 1.5
1013 spiga 1.238 def wsModifyReport(self, nj):
1014 fanzago 1.93 """
1015 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1016 fanzago 1.93 """
1017 ewv 1.250
1018 fanzago 1.281 txt = ''
1019 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1020 fanzago 1.292 if (self.copy_data == 1):
1021 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1022 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1023 ewv 1.283
1024 spiga 1.238
1025     txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1026 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1027 fanzago 1.175 txt += 'else\n'
1028     txt += ' FOR_LFN=/copy_problems/ \n'
1029     txt += 'fi\n'
1030 ewv 1.182
1031 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1032 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1033 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1034     txt += 'echo "SE_PATH = $SE_PATH"\n'
1035     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1036     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1037 fanzago 1.281
1038    
1039     args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH'
1040 fanzago 1.318 if (self.publish_data == 1):
1041     #processedDataset = self.cfg_params['USER.publish_data_name']
1042     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1043 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1044     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1045 fanzago 1.281
1046 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1047     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1048 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1049     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1050     txt += ' modifyReport_result=70500\n'
1051     txt += ' job_exit_code=$modifyReport_result\n'
1052     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1053     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1054     txt += 'else\n'
1055     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1056 spiga 1.103 txt += 'fi\n'
1057 fanzago 1.93 return txt
1058 ewv 1.283
1059 ewv 1.192 def wsParseFJR(self):
1060 spiga 1.189 """
1061 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1062 spiga 1.189 """
1063     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1064     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1065     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1066     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1067 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1068 fanzago 1.285 if self.debug_wrapper==1 :
1069 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1070     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1071 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1072     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1073 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1074 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1075 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1076     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1077 spiga 1.189 txt += ' else\n'
1078     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1079     txt += ' fi\n'
1080     txt += ' else\n'
1081     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1082     txt += ' fi\n'
1083     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1084 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1085 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1086 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1087 spiga 1.296 """
1088 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1089 spiga 1.189 # VERIFY PROCESSED DATA
1090 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1091     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1092     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1093     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1094     txt += ' mv tmp.txt input-files.txt\n'
1095     txt += ' echo "cat input-files.txt"\n'
1096     txt += ' echo "----------------------"\n'
1097     txt += ' cat input-files.txt\n'
1098     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1099     txt += ' mv tmp.txt processed-files.txt\n'
1100     txt += ' echo "----------------------"\n'
1101     txt += ' echo "cat processed-files.txt"\n'
1102     txt += ' echo "----------------------"\n'
1103     txt += ' cat processed-files.txt\n'
1104     txt += ' echo "----------------------"\n'
1105 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1106 fanzago 1.273 txt += ' fileverify_status=$?\n'
1107     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1108     txt += ' executable_exit_status=30001\n'
1109     txt += ' echo "ERROR ==> not all input files processed"\n'
1110     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1111     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1112     txt += ' fi\n'
1113 spiga 1.296 """
1114 spiga 1.232 txt += ' fi\n'
1115 spiga 1.189 txt += 'else\n'
1116     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1117     txt += 'fi\n'
1118     txt += '\n'
1119 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1120 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1121     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1122     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1123     txt += ' job_exit_code=$executable_exit_status\n'
1124     txt += ' func_exit\n'
1125     txt += 'fi\n\n'
1126 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1127     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1128     txt += 'job_exit_code=$executable_exit_status\n'
1129    
1130     return txt
1131    
1132 gutsche 1.5 def setParam_(self, param, value):
1133     self._params[param] = value
1134    
1135     def getParams(self):
1136     return self._params
1137 gutsche 1.8
1138 spiga 1.257 def outList(self,list=False):
1139 mcinquil 1.121 """
1140     check the dimension of the output files
1141     """
1142 spiga 1.169 txt = ''
1143     txt += 'echo ">>> list of expected files on output sandbox"\n'
1144 mcinquil 1.121 listOutFiles = []
1145 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1146 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1147 spiga 1.268 if len(self.output_file) <= 0:
1148     msg ="WARNING: no output files name have been defined!!\n"
1149     msg+="\tno output files will be reported back/staged\n"
1150 spiga 1.304 common.logger.info(msg)
1151 fanzago 1.148 if (self.return_data == 1):
1152 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1153 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1154 spiga 1.169 listOutFiles.append(stdout)
1155     listOutFiles.append(stderr)
1156 ewv 1.156 else:
1157 spiga 1.157 for file in (self.output_file_sandbox):
1158 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1159 spiga 1.169 listOutFiles.append(stdout)
1160     listOutFiles.append(stderr)
1161 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1162 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1163 spiga 1.169 txt += 'export filesToCheck\n'
1164 ewv 1.276
1165 spiga 1.257 if list : return self.output_file
1166 ewv 1.170 return txt