ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.321
Committed: Tue Jul 21 21:19:05 2009 UTC (15 years, 9 months ago) by ewv
Content type: text/x-python
Branch: MAIN
Changes since 1.320: +22 -20 lines
Log Message:
Solve case where multiple output modules present and case where modules present and not in endpath (Nicola)

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_exceptions import *
3     from crab_util import *
4     import common
5     import Scram
6 spiga 1.269 from Splitter import JobSplitter
7 slacapra 1.1
8 spiga 1.293 from IMProv.IMProvNode import IMProvNode
9 slacapra 1.105 import os, string, glob
10 slacapra 1.1
11     class Cmssw(JobType):
12 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
13 slacapra 1.1 JobType.__init__(self, 'CMSSW')
14 spiga 1.304 common.logger.debug('CMSSW::__init__')
15 spiga 1.208 self.skip_blocks = skip_blocks
16 spiga 1.296 self.argsList = 1
17 spiga 1.315 self.NumEvents=0
18 gutsche 1.3 self._params = {}
19     self.cfg_params = cfg_params
20 ewv 1.254
21 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
22 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
23     self.cfg_params.get('CRAB.use_server',0)
24 ewv 1.250 size = 9.5
25 ewv 1.319 if self.server or common.scheduler.name().upper() in ['LSF','CAF']:
26     size = 99999
27 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
28 gutsche 1.72
29 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
30 gutsche 1.38 self.ncjobs = ncjobs
31    
32 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
33     self.additional_inbox_files = []
34     self.scriptExe = ''
35     self.executable = ''
36 slacapra 1.71 self.executable_arch = self.scram.getArch()
37 spiga 1.320 self.tgz_name = 'default.tgz'
38 corvo 1.56 self.scriptName = 'CMSSW.sh'
39 ewv 1.192 self.pset = ''
40 spiga 1.187 self.datasetPath = ''
41 gutsche 1.3
42 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
43 gutsche 1.50 # set FJR file name
44     self.fjrFileName = 'crab_fjr.xml'
45    
46 slacapra 1.1 self.version = self.scram.getSWVersion()
47 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
48 ewv 1.313
49 ewv 1.182 try:
50 slacapra 1.317 type, self.CMSSW_major, self.CMSSW_minor, self.CMSSW_patch = tuple(self.version.split('_'))
51 ewv 1.182 except:
52 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
53 ewv 1.182 raise CrabException(msg)
54    
55 ewv 1.276 if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5):
56     msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version."
57     raise CrabException(msg)
58     """
59     As CMSSW versions are dropped we can drop more code:
60     1.X dropped: drop support for running .cfg on WN
61     2.0 dropped: drop all support for cfg here and in writeCfg
62     2.0 dropped: Recheck the random number seed support
63     """
64    
65 slacapra 1.1 ### collect Data cards
66 gutsche 1.66
67 ewv 1.226
68 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
69 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
70 ewv 1.226
71 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
72 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
73 spiga 1.236
74     if tmp =='':
75     msg = "Error: datasetpath not defined "
76     raise CrabException(msg)
77     elif string.lower(tmp)=='none':
78 slacapra 1.153 self.datasetPath = None
79     self.selectNoInput = 1
80 fanzago 1.318 self.primaryDataset = 'null'
81 slacapra 1.153 else:
82     self.datasetPath = tmp
83     self.selectNoInput = 0
84 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
85     self.dataTier = self.datasetPath.split("/")[2]
86 gutsche 1.5
87 slacapra 1.1 self.dataTiers = []
88 ewv 1.295
89 spiga 1.288 self.debugWrap=''
90 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
91     if self.debug_wrapper == 1: self.debugWrap='--debug'
92 slacapra 1.291
93 slacapra 1.1 ## now the application
94 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
95 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
96 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
97 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
98 slacapra 1.1
99 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
100 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
101 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
102 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
103 slacapra 1.153 if self.pset.lower() != 'none' :
104     if (not os.path.exists(self.pset)):
105     raise CrabException("User defined PSet file "+self.pset+" does not exist")
106     else:
107     self.pset = None
108 slacapra 1.1
109     # output files
110 slacapra 1.53 ## stuff which must be returned always via sandbox
111     self.output_file_sandbox = []
112    
113     # add fjr report by default via sandbox
114     self.output_file_sandbox.append(self.fjrFileName)
115    
116     # other output files to be returned via sandbox or copied to SE
117 mcinquil 1.216 outfileflag = False
118 slacapra 1.153 self.output_file = []
119     tmp = cfg_params.get('CMSSW.output_file',None)
120     if tmp :
121 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
122 mcinquil 1.216 outfileflag = True #output found
123     #else:
124     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
125 slacapra 1.1
126     # script_exe file as additional file in inputSandbox
127 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
128     if self.scriptExe :
129 slacapra 1.176 if not os.path.isfile(self.scriptExe):
130     msg ="ERROR. file "+self.scriptExe+" not found"
131     raise CrabException(msg)
132     self.additional_inbox_files.append(string.strip(self.scriptExe))
133 slacapra 1.70
134 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
135     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
136    
137 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
138 slacapra 1.176 msg ="Error. script_exe not defined"
139     raise CrabException(msg)
140 spiga 1.42
141 ewv 1.226 # use parent files...
142 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
143 spiga 1.204
144 slacapra 1.1 ## additional input files
145 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
146 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
147 slacapra 1.70 for tmp in tmpAddFiles:
148     tmp = string.strip(tmp)
149     dirname = ''
150     if not tmp[0]=="/": dirname = "."
151 corvo 1.85 files = []
152     if string.find(tmp,"*")>-1:
153     files = glob.glob(os.path.join(dirname, tmp))
154     if len(files)==0:
155     raise CrabException("No additional input file found with this pattern: "+tmp)
156     else:
157     files.append(tmp)
158 slacapra 1.70 for file in files:
159     if not os.path.exists(file):
160     raise CrabException("Additional input file not found: "+file)
161 slacapra 1.45 pass
162 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
163 slacapra 1.1 pass
164     pass
165 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
166 slacapra 1.153 pass
167 gutsche 1.3
168 gutsche 1.35
169 ewv 1.160 ## New method of dealing with seeds
170     self.incrementSeeds = []
171     self.preserveSeeds = []
172     if cfg_params.has_key('CMSSW.preserve_seeds'):
173     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
174     for tmp in tmpList:
175     tmp.strip()
176     self.preserveSeeds.append(tmp)
177     if cfg_params.has_key('CMSSW.increment_seeds'):
178     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
179     for tmp in tmpList:
180     tmp.strip()
181     self.incrementSeeds.append(tmp)
182    
183 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
184 slacapra 1.90
185 fanzago 1.318 # Copy/return/publish
186 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
187     self.return_data = int(cfg_params.get('USER.return_data',0))
188 fanzago 1.318 ### FEDE ###
189     self.publish_data = int(cfg_params.get('USER.publish_data',0))
190     if (self.publish_data == 1):
191     if not cfg_params.has_key('USER.publish_data_name'):
192     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
193     else:
194     self.processedDataset = cfg_params['USER.publish_data_name']
195     #### check of lenght of datasetname to publish ####
196     common.logger.debug("test 100 char limit on datasetname")
197     user = getUserName()
198     common.logger.debug("user = " + user)
199     len_user_name = len(user)
200 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
201 fanzago 1.318 len_processedDataset = len(self.processedDataset)
202     common.logger.debug("processedDataset " + self.processedDataset)
203     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
204     if (self.datasetPath != None ):
205     len_primary = len(self.primaryDataset)
206     common.logger.debug("primaryDataset = " + self.primaryDataset)
207     common.logger.debug("len_primary = " + str(len_primary))
208     #common.logger.info("59 - len_user_name - len_primary = " + str(59 - len_user_name - len_primary))
209     if (len_processedDataset > (59 - len_user_name - len_primary)):
210     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
211     else:
212 ewv 1.319 if (len_processedDataset > (59 - len_user_name) / 2):
213 fanzago 1.318 raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
214 ewv 1.276
215     self.conf = {}
216     self.conf['pubdata'] = None
217 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
218 slacapra 1.1 #DBSDLS-start
219 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
220 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
221     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
222 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
223 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
224 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
225 gutsche 1.35 blockSites = {}
226 slacapra 1.9 if self.datasetPath:
227 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
228 ewv 1.131 #DBSDLS-end
229 spiga 1.269 self.conf['blockSites']=blockSites
230    
231 slacapra 1.9 ## Select Splitting
232 spiga 1.269 splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
233    
234 ewv 1.131 if self.selectNoInput:
235 spiga 1.187 if self.pset == None:
236 ewv 1.276 self.algo = 'ForScript'
237 spiga 1.42 else:
238 spiga 1.271 self.algo = 'NoInput'
239 ewv 1.276 self.conf['managedGenerators']=self.managedGenerators
240     self.conf['generator']=self.generator
241     elif splitByRun ==1:
242     self.algo = 'RunBased'
243 spiga 1.269 else:
244 ewv 1.276 self.algo = 'EventBased'
245    
246     # self.algo = 'LumiBased'
247     splitter = JobSplitter(self.cfg_params,self.conf)
248 spiga 1.269 self.dict = splitter.Algos()[self.algo]()
249 gutsche 1.5
250 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
251     self.rootArgsFilename= 'arguments'
252 spiga 1.208 # modify Pset only the first time
253 spiga 1.320 if isNew:
254     if self.pset != None: self.ModifyPset()
255 spiga 1.300
256 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
257     self.tarNameWithPath = self.getTarBall(self.executable)
258 spiga 1.293
259    
260     def ModifyPset(self):
261     import PsetManipulator as pp
262     PsetEdit = pp.PsetManipulator(self.pset)
263     try:
264     # Add FrameworkJobReport to parameter-set, set max events.
265     # Reset later for data jobs by writeCFG which does all modifications
266 ewv 1.295 PsetEdit.maxEvent(1)
267 spiga 1.293 PsetEdit.skipEvent(0)
268     PsetEdit.psetWriter(self.configFilename())
269     ## If present, add TFileService to output files
270     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
271     tfsOutput = PsetEdit.getTFileService()
272     if tfsOutput:
273     if tfsOutput in self.output_file:
274 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
275 spiga 1.293 else:
276     outfileflag = True #output found
277     self.output_file.append(tfsOutput)
278 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
279 spiga 1.293 pass
280     pass
281 ewv 1.321 # If present and requested, add PoolOutputModule to output files
282 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
283 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
284     if edmOutput:
285 ewv 1.321 for outputFile in edmOutput:
286     if outputFile in self.output_file:
287     common.logger.debug("Output from PoolOutputModule "+edmOutput+" already in output files")
288     else:
289     self.output_file.append(outputFile)
290     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
291     # not requested, check anyhow to avoid accidental T2 overload
292 slacapra 1.297 else:
293 ewv 1.321 if edmOutput:
294     missedFiles = []
295     for outputFile in edmOutput:
296     if outputFile not in self.output_file:
297     missedFiles.append(outputFile)
298     if missedFiles:
299     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
300     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
301     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
302     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
303     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
304     common.logger.info(msg)
305     else msg:
306     raise CrabException(msg)
307 ewv 1.301
308     if (PsetEdit.getBadFilesSetting()):
309     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
310 spiga 1.304 common.logger.info(msg)
311 ewv 1.301
312 slacapra 1.297 except CrabException, msg:
313 spiga 1.304 common.logger.info(str(msg))
314 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
315 spiga 1.293 raise CrabException(msg)
316    
317 gutsche 1.3
318 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
319    
320 slacapra 1.86 import DataDiscovery
321     import DataLocation
322 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
323 gutsche 1.3
324     datasetPath=self.datasetPath
325    
326 slacapra 1.1 ## Contact the DBS
327 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
328 slacapra 1.1 try:
329 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
330 slacapra 1.1 self.pubdata.fetchDBSInfo()
331    
332 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
333 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
334     raise CrabException(msg)
335 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
336 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
337     raise CrabException(msg)
338 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
339 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
340 slacapra 1.1 raise CrabException(msg)
341    
342 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
343 slacapra 1.270 #print self.filesbyblock
344 spiga 1.269 self.conf['pubdata']=self.pubdata
345 gutsche 1.3
346 slacapra 1.1 ## get max number of events
347 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
348 slacapra 1.1
349     ## Contact the DLS and build a list of sites hosting the fileblocks
350     try:
351 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
352 gutsche 1.6 dataloc.fetchDLSInfo()
353 slacapra 1.263
354 slacapra 1.41 except DataLocation.DataLocationError , ex:
355 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
356     raise CrabException(msg)
357 ewv 1.131
358 slacapra 1.1
359 slacapra 1.270 unsorted_sites = dataloc.getSites()
360     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
361     for lfn in self.filesbyblock.keys():
362     if unsorted_sites.has_key(lfn):
363     sites[lfn]=unsorted_sites[lfn]
364     else:
365     sites[lfn]=[]
366    
367 slacapra 1.264 if len(sites)==0:
368 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
369     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
370     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
371 slacapra 1.264 raise CrabException(msg)
372    
373 gutsche 1.35 allSites = []
374     listSites = sites.values()
375 slacapra 1.63 for listSite in listSites:
376     for oneSite in listSite:
377 gutsche 1.35 allSites.append(oneSite)
378 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
379 ewv 1.295
380 gutsche 1.3
381 gutsche 1.92 # screen output
382 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
383 gutsche 1.92
384 gutsche 1.35 return sites
385 ewv 1.131
386 spiga 1.42
387 spiga 1.208 def split(self, jobParams,firstJobID):
388 ewv 1.276
389 spiga 1.293 jobParams = self.dict['args']
390 spiga 1.269 njobs = self.dict['njobs']
391     self.jobDestination = self.dict['jobDestination']
392 ewv 1.131
393 slacapra 1.263 if njobs==0:
394     raise CrabException("Ask to split "+str(njobs)+" jobs: aborting")
395 ewv 1.319 if not self.server and njobs > 500:
396     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
397 slacapra 1.263
398 gutsche 1.3 # create the empty structure
399     for i in range(njobs):
400     jobParams.append("")
401 ewv 1.131
402 spiga 1.165 listID=[]
403     listField=[]
404 spiga 1.293 listDictions=[]
405 spiga 1.300 exist= os.path.exists(self.argsFile)
406 spiga 1.208 for id in range(njobs):
407     job = id + int(firstJobID)
408 spiga 1.167 listID.append(job+1)
409 spiga 1.162 job_ToSave ={}
410 spiga 1.169 concString = ' '
411 spiga 1.165 argu=''
412 spiga 1.293 str_argu = str(job+1)
413 spiga 1.208 if len(jobParams[id]):
414 ewv 1.295 argu = {'JobID': job+1}
415 spiga 1.293 for i in range(len(jobParams[id])):
416     argu[self.dict['params'][i]]=jobParams[id][i]
417 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
418 ewv 1.295 # just for debug
419 spiga 1.293 str_argu += concString.join(jobParams[id])
420 spiga 1.314 if argu != '': listDictions.append(argu)
421 spiga 1.298 job_ToSave['arguments']= str(job+1)
422 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
423 spiga 1.165 listField.append(job_ToSave)
424 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
425     cms_se = CmsSEMap()
426 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
427 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
428 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
429     msg+="\t CMSDestination: %s "%(str(SEDestination))
430 spiga 1.307 common.logger.log(10-1,msg)
431 spiga 1.293 # write xml
432 ewv 1.295 if len(listDictions):
433 spiga 1.293 if exist==False: self.CreateXML()
434     self.addEntry(listDictions)
435 spiga 1.320 # self.zipXMLfile()
436 spiga 1.187 common._db.updateJob_(listID,listField)
437 spiga 1.293 return
438 ewv 1.313
439 spiga 1.320 # def zipXMLfile(self):
440 ewv 1.313
441 spiga 1.320 # import tarfile
442     # try:
443     # tar = tarfile.open(self.tarNameWithPath, "a")
444     # tar.add(self.argsFile, os.path.basename(self.argsFile))
445     # tar.close()
446     # except IOError, exc:
447     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
448     # msg += str(exc)
449     # raise CrabException(msg)
450     # except tarfile.TarError, exc:
451     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
452     # msg += str(exc)
453     # raise CrabException(msg)
454    
455 spiga 1.293 def CreateXML(self):
456     """
457 ewv 1.295 """
458 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
459     outfile = file( self.argsFile, 'w').write(str(result))
460 ewv 1.295 return
461 spiga 1.293
462     def addEntry(self, listDictions):
463     """
464     _addEntry_
465 ewv 1.295
466 spiga 1.293 add an entry to the xml file
467     """
468     from IMProv.IMProvLoader import loadIMProvFile
469     ## load xml
470 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
471 spiga 1.293 entrname= 'Job'
472     for dictions in listDictions:
473     report = IMProvNode(entrname , None, **dictions)
474     improvDoc.addNode(report)
475 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
476 gutsche 1.3 return
477 ewv 1.131
478 gutsche 1.3 def numberOfJobs(self):
479 spiga 1.269 return self.dict['njobs']
480 gutsche 1.3
481 slacapra 1.1 def getTarBall(self, exe):
482     """
483     Return the TarBall with lib and exe
484     """
485 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
486     if os.path.exists(self.tgzNameWithPath):
487     return self.tgzNameWithPath
488 slacapra 1.1
489     # Prepare a tar gzipped file with user binaries.
490     self.buildTar_(exe)
491    
492 spiga 1.320 return string.strip(self.tgzNameWithPath)
493 slacapra 1.1
494     def buildTar_(self, executable):
495    
496     # First of all declare the user Scram area
497     swArea = self.scram.getSWArea_()
498     swReleaseTop = self.scram.getReleaseTop_()
499 ewv 1.131
500 slacapra 1.1 ## check if working area is release top
501     if swReleaseTop == '' or swArea == swReleaseTop:
502 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
503 slacapra 1.1 return
504    
505 slacapra 1.61 import tarfile
506     try: # create tar ball
507 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
508 slacapra 1.61 ## First find the executable
509 slacapra 1.86 if (self.executable != ''):
510 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
511     if ( not exeWithPath ):
512     raise CrabException('User executable '+executable+' not found')
513 ewv 1.131
514 slacapra 1.61 ## then check if it's private or not
515     if exeWithPath.find(swReleaseTop) == -1:
516     # the exe is private, so we must ship
517 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
518 slacapra 1.61 path = swArea+'/'
519 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
520     if exeWithPath.find(path) >= 0 :
521     exe = string.replace(exeWithPath, path,'')
522 slacapra 1.129 tar.add(path+exe,exe)
523 corvo 1.85 else :
524     tar.add(exeWithPath,os.path.basename(executable))
525 slacapra 1.61 pass
526     else:
527     # the exe is from release, we'll find it on WN
528     pass
529 ewv 1.131
530 slacapra 1.61 ## Now get the libraries: only those in local working area
531 slacapra 1.256 tar.dereference=True
532 slacapra 1.61 libDir = 'lib'
533     lib = swArea+'/' +libDir
534 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
535 slacapra 1.61 if os.path.exists(lib):
536     tar.add(lib,libDir)
537 ewv 1.131
538 slacapra 1.61 ## Now check if module dir is present
539     moduleDir = 'module'
540     module = swArea + '/' + moduleDir
541     if os.path.isdir(module):
542     tar.add(module,moduleDir)
543 slacapra 1.256 tar.dereference=False
544 slacapra 1.61
545     ## Now check if any data dir(s) is present
546 spiga 1.179 self.dataExist = False
547 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
548 slacapra 1.206 while len(todo_list):
549     entry, name = todo_list.pop()
550 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
551 slacapra 1.206 continue
552 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
553 slacapra 1.206 entryPath = entry + '/'
554 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
555 slacapra 1.206 if name == 'data':
556     self.dataExist=True
557 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
558 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
559 slacapra 1.206 pass
560     pass
561 ewv 1.182
562 spiga 1.179 ### CMSSW ParameterSet
563     if not self.pset is None:
564     cfg_file = common.work_space.jobDir()+self.configFilename()
565 ewv 1.182 tar.add(cfg_file,self.configFilename())
566 ewv 1.313
567 spiga 1.309 try:
568     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
569     tar.add(crab_cfg_file,'crab.cfg')
570     except:
571     pass
572 fanzago 1.93
573 fanzago 1.152 ## Add ProdCommon dir to tar
574 slacapra 1.211 prodcommonDir = './'
575     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
576 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
577 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
578     'WMCore/__init__.py','WMCore/Algorithms']
579 slacapra 1.214 for file in neededStuff:
580     tar.add(prodcommonPath+file,prodcommonDir+file)
581 spiga 1.179
582     ##### ML stuff
583     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
584     path=os.environ['CRABDIR'] + '/python/'
585     for file in ML_file_list:
586     tar.add(path+file,file)
587    
588     ##### Utils
589 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
590 spiga 1.179 for file in Utils_file_list:
591     tar.add(path+file,file)
592 ewv 1.131
593 ewv 1.182 ##### AdditionalFiles
594 slacapra 1.253 tar.dereference=True
595 spiga 1.179 for file in self.additional_inbox_files:
596     tar.add(file,string.split(file,'/')[-1])
597 slacapra 1.253 tar.dereference=False
598 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
599 ewv 1.182
600 slacapra 1.61 tar.close()
601 mcinquil 1.241 except IOError, exc:
602 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
603 spiga 1.304 msg += str(exc)
604     raise CrabException(msg)
605 mcinquil 1.241 except tarfile.TarError, exc:
606 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
607 spiga 1.304 msg += str(exc)
608     raise CrabException(msg)
609 spiga 1.300
610 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
611     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
612 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
613 ewv 1.250 +'MB input sandbox limit \n'
614 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
615     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
616     msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
617     raise CrabException(msg)
618 gutsche 1.72
619 slacapra 1.61 ## create tar-ball with ML stuff
620 slacapra 1.97
621 spiga 1.165 def wsSetupEnvironment(self, nj=0):
622 slacapra 1.1 """
623     Returns part of a job script which prepares
624     the execution environment for the job 'nj'.
625     """
626 ewv 1.276 # FUTURE: Drop support for .cfg when possible
627 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
628     psetName = 'pset.py'
629     else:
630     psetName = 'pset.cfg'
631 slacapra 1.1 # Prepare JobType-independent part
632 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
633 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
634 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
635 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
636 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
637 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
638 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
639 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
640     txt += ' job_exit_code=10016\n'
641     txt += ' func_exit\n'
642 gutsche 1.3 txt += ' fi\n'
643 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
644 gutsche 1.3 txt += '\n'
645     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
646     txt += ' cd $WORKING_DIR\n'
647 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
648 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
649 spiga 1.282 #Setup SGE Environment
650 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
651 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
652    
653 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
654     txt += self.wsSetupCMSLCGEnvironment_()
655    
656 gutsche 1.3 txt += 'fi\n'
657 slacapra 1.1
658     # Prepare JobType-specific part
659     scram = self.scram.commandName()
660     txt += '\n\n'
661 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
662     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
663 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
664     txt += 'status=$?\n'
665     txt += 'if [ $status != 0 ] ; then\n'
666 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
667     txt += ' job_exit_code=10034\n'
668 fanzago 1.163 txt += ' func_exit\n'
669 slacapra 1.1 txt += 'fi \n'
670     txt += 'cd '+self.version+'\n'
671 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
672 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
673 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
674 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
675     txt += ' echo "ERROR ==> Problem with the command: "\n'
676     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
677     txt += ' job_exit_code=10034\n'
678     txt += ' func_exit\n'
679     txt += 'fi \n'
680 slacapra 1.1 # Handle the arguments:
681     txt += "\n"
682 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
683 slacapra 1.1 txt += "\n"
684 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
685 slacapra 1.1 txt += "then\n"
686 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
687     txt += ' job_exit_code=50113\n'
688     txt += " func_exit\n"
689 slacapra 1.1 txt += "fi\n"
690     txt += "\n"
691    
692     # Prepare job-specific part
693     job = common.job_list[nj]
694 ewv 1.131 if (self.datasetPath):
695 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
696     #DataTier = self.datasetPath.split("/")[2]
697 fanzago 1.93 txt += '\n'
698     txt += 'DatasetPath='+self.datasetPath+'\n'
699    
700 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
701 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
702 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
703 fanzago 1.93
704     else:
705 fanzago 1.318 #self.primaryDataset = 'null'
706 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
707     txt += 'PrimaryDataset=null\n'
708     txt += 'DataTier=null\n'
709     txt += 'ApplicationFamily=MCDataTier\n'
710 ewv 1.170 if self.pset != None:
711 spiga 1.42 pset = os.path.basename(job.configFilename())
712     txt += '\n'
713 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
714 spiga 1.296
715 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
716     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
717     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
718     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
719 slacapra 1.90
720 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
721 ewv 1.319 else:
722 spiga 1.314 txt += '\n'
723 spiga 1.315 if self.AdditionalArgs: txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs)
724     if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
725 gutsche 1.3 return txt
726 slacapra 1.176
727 fanzago 1.166 def wsUntarSoftware(self, nj=0):
728 gutsche 1.3 """
729     Put in the script the commands to build an executable
730     or a library.
731     """
732    
733 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
734 gutsche 1.3
735     if os.path.isfile(self.tgzNameWithPath):
736 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
737 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
738 fanzago 1.285 if self.debug_wrapper==1 :
739 spiga 1.199 txt += 'ls -Al \n'
740 gutsche 1.3 txt += 'untar_status=$? \n'
741     txt += 'if [ $untar_status -ne 0 ]; then \n'
742 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
743     txt += ' job_exit_code=$untar_status\n'
744     txt += ' func_exit\n'
745 gutsche 1.3 txt += 'else \n'
746     txt += ' echo "Successful untar" \n'
747     txt += 'fi \n'
748 gutsche 1.50 txt += '\n'
749 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
750 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
751 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
752 gutsche 1.50 txt += 'else\n'
753 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
754 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
755 gutsche 1.50 txt += 'fi\n'
756     txt += '\n'
757    
758 gutsche 1.3 pass
759 ewv 1.131
760 slacapra 1.1 return txt
761 ewv 1.170
762 fanzago 1.166 def wsBuildExe(self, nj=0):
763     """
764     Put in the script the commands to build an executable
765     or a library.
766     """
767    
768     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
769     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
770    
771 ewv 1.170 txt += 'rm -r lib/ module/ \n'
772     txt += 'mv $RUNTIME_AREA/lib/ . \n'
773     txt += 'mv $RUNTIME_AREA/module/ . \n'
774 spiga 1.186 if self.dataExist == True:
775     txt += 'rm -r src/ \n'
776     txt += 'mv $RUNTIME_AREA/src/ . \n'
777 ewv 1.182 if len(self.additional_inbox_files)>0:
778 spiga 1.179 for file in self.additional_inbox_files:
779 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
780 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
781     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
782 ewv 1.170
783 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
784 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
785 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
786 fanzago 1.166 txt += 'else\n'
787 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
788 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
789     txt += 'fi\n'
790     txt += '\n'
791    
792 slacapra 1.302 if self.pset != None:
793 slacapra 1.303 # FUTURE: Drop support for .cfg when possible
794     if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
795     psetName = 'pset.py'
796     else:
797     psetName = 'pset.cfg'
798 slacapra 1.302 # FUTURE: Can simply for 2_1_x and higher
799     txt += '\n'
800     if self.debug_wrapper == 1:
801     txt += 'echo "***** cat ' + psetName + ' *********"\n'
802     txt += 'cat ' + psetName + '\n'
803     txt += 'echo "****** end ' + psetName + ' ********"\n'
804     txt += '\n'
805     txt += 'echo "***********************" \n'
806     txt += 'which edmConfigHash \n'
807     txt += 'echo "***********************" \n'
808     if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
809     txt += 'edmConfigHash ' + psetName + ' \n'
810     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
811     else:
812     txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
813     txt += 'echo "PSETHASH = $PSETHASH" \n'
814     #### FEDE temporary fix for noEdm files #####
815     txt += 'if [ -z "$PSETHASH" ]; then \n'
816     txt += ' export PSETHASH=null\n'
817     txt += 'fi \n'
818     #############################################
819     txt += '\n'
820 fanzago 1.166 return txt
821 slacapra 1.1
822 ewv 1.131
823 slacapra 1.1 def executableName(self):
824 ewv 1.192 if self.scriptExe:
825 spiga 1.42 return "sh "
826     else:
827     return self.executable
828 slacapra 1.1
829     def executableArgs(self):
830 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
831 ewv 1.276 if self.scriptExe:
832 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
833 fanzago 1.115 else:
834 ewv 1.160 ex_args = ""
835 ewv 1.276 ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
836     # Type of config file depends on CMSSW version
837 ewv 1.184 if self.CMSSW_major >= 2 :
838 ewv 1.171 ex_args += " -p pset.py"
839 fanzago 1.115 else:
840 ewv 1.160 ex_args += " -p pset.cfg"
841     return ex_args
842 slacapra 1.1
843     def inputSandbox(self, nj):
844     """
845     Returns a list of filenames to be put in JDL input sandbox.
846     """
847     inp_box = []
848     if os.path.isfile(self.tgzNameWithPath):
849     inp_box.append(self.tgzNameWithPath)
850 spiga 1.320 if os.path.isfile(self.argsFile):
851     inp_box.append(self.argsFile)
852 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
853 slacapra 1.1 return inp_box
854    
855     def outputSandbox(self, nj):
856     """
857     Returns a list of filenames to be put in JDL output sandbox.
858     """
859     out_box = []
860    
861     ## User Declared output files
862 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
863 ewv 1.131 n_out = nj + 1
864 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
865 slacapra 1.1 return out_box
866    
867    
868     def wsRenameOutput(self, nj):
869     """
870     Returns part of a job script which renames the produced files.
871     """
872    
873 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
874 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
875     txt += 'echo ">>> current directory content:"\n'
876 fanzago 1.285 if self.debug_wrapper==1:
877 spiga 1.199 txt += 'ls -Al\n'
878 fanzago 1.145 txt += '\n'
879 slacapra 1.54
880 fanzago 1.128 for fileWithSuffix in (self.output_file):
881 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
882 slacapra 1.1 txt += '\n'
883 gutsche 1.7 txt += '# check output file\n'
884 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
885 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
886     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
887 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
888 ewv 1.147 else:
889     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
890     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
891 slacapra 1.106 txt += 'else\n'
892 fanzago 1.161 txt += ' job_exit_code=60302\n'
893     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
894 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
895 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
896     txt += ' echo "prepare dummy output file"\n'
897     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
898     txt += ' fi \n'
899 slacapra 1.1 txt += 'fi\n'
900 slacapra 1.105 file_list = []
901     for fileWithSuffix in (self.output_file):
902 spiga 1.246 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
903 ewv 1.131
904 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
905 fanzago 1.149 txt += '\n'
906 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
907     txt += 'echo ">>> current directory content:"\n'
908 fanzago 1.285 if self.debug_wrapper==1:
909 spiga 1.199 txt += 'ls -Al\n'
910 fanzago 1.148 txt += '\n'
911 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
912 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
913 slacapra 1.1 return txt
914    
915 slacapra 1.63 def getRequirements(self, nj=[]):
916 slacapra 1.1 """
917 ewv 1.131 return job requirements to add to jdl files
918 slacapra 1.1 """
919     req = ''
920 slacapra 1.47 if self.version:
921 slacapra 1.10 req='Member("VO-cms-' + \
922 slacapra 1.47 self.version + \
923 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
924 ewv 1.192 if self.executable_arch:
925 gutsche 1.107 req+=' && Member("VO-cms-' + \
926 slacapra 1.105 self.executable_arch + \
927     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
928 gutsche 1.35
929     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
930 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
931 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
932     if ( self.cfg_params.get('GRID.use_cream',None) ):
933     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
934     else:
935     req += ' && other.GlueCEStateStatus == "Production" '
936 gutsche 1.35
937 slacapra 1.1 return req
938 gutsche 1.3
939     def configFilename(self):
940     """ return the config filename """
941 ewv 1.182 # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
942 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
943 slacapra 1.316 return self.name()+'.py'
944 ewv 1.182 else:
945 slacapra 1.316 return self.name()+'.cfg'
946 gutsche 1.3
947     def wsSetupCMSOSGEnvironment_(self):
948     """
949     Returns part of a job script which is prepares
950     the execution environment and which is common for all CMS jobs.
951     """
952 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
953     txt += ' echo ">>> setup CMS OSG environment:"\n'
954 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
955     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
956 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
957 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
958 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
959 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
960     txt += ' else\n'
961 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
962     txt += ' job_exit_code=10020\n'
963     txt += ' func_exit\n'
964 fanzago 1.133 txt += ' fi\n'
965 gutsche 1.3 txt += '\n'
966 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
967 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
968 gutsche 1.3
969     return txt
970 ewv 1.131
971 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
972     """
973     Returns part of a job script which is prepares
974     the execution environment and which is common for all CMS jobs.
975     """
976 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
977     txt += ' echo ">>> setup CMS LCG environment:"\n'
978 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
979     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
980     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
981     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
982 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
983     txt += ' job_exit_code=10031\n'
984     txt += ' func_exit\n'
985 fanzago 1.133 txt += ' else\n'
986     txt += ' echo "Sourcing environment... "\n'
987     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
988 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
989     txt += ' job_exit_code=10020\n'
990     txt += ' func_exit\n'
991 fanzago 1.133 txt += ' fi\n'
992     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
993     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
994     txt += ' result=$?\n'
995     txt += ' if [ $result -ne 0 ]; then\n'
996 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
997     txt += ' job_exit_code=10032\n'
998     txt += ' func_exit\n'
999 fanzago 1.133 txt += ' fi\n'
1000     txt += ' fi\n'
1001     txt += ' \n'
1002 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1003 gutsche 1.3 return txt
1004 gutsche 1.5
1005 spiga 1.238 def wsModifyReport(self, nj):
1006 fanzago 1.93 """
1007 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1008 fanzago 1.93 """
1009 ewv 1.250
1010 fanzago 1.281 txt = ''
1011 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1012 fanzago 1.292 if (self.copy_data == 1):
1013 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1014 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1015 ewv 1.283
1016 spiga 1.238
1017     txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1018 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1019 fanzago 1.175 txt += 'else\n'
1020     txt += ' FOR_LFN=/copy_problems/ \n'
1021     txt += 'fi\n'
1022 ewv 1.182
1023 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1024 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1025 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1026     txt += 'echo "SE_PATH = $SE_PATH"\n'
1027     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1028     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1029 fanzago 1.281
1030    
1031     args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH'
1032 fanzago 1.318 if (self.publish_data == 1):
1033     #processedDataset = self.cfg_params['USER.publish_data_name']
1034     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1035 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1036     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1037 fanzago 1.281
1038 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1039     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1040 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1041     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1042     txt += ' modifyReport_result=70500\n'
1043     txt += ' job_exit_code=$modifyReport_result\n'
1044     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1045     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1046     txt += 'else\n'
1047     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1048 spiga 1.103 txt += 'fi\n'
1049 fanzago 1.93 return txt
1050 ewv 1.283
1051 ewv 1.192 def wsParseFJR(self):
1052 spiga 1.189 """
1053 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1054 spiga 1.189 """
1055     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1056     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1057     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1058     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1059 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1060 fanzago 1.285 if self.debug_wrapper==1 :
1061 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1062     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1063 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1064     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1065 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1066 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1067 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1068     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1069 spiga 1.189 txt += ' else\n'
1070     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1071     txt += ' fi\n'
1072     txt += ' else\n'
1073     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1074     txt += ' fi\n'
1075     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1076 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1077 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1078 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1079 spiga 1.296 """
1080 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1081 spiga 1.189 # VERIFY PROCESSED DATA
1082 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1083     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1084     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1085     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1086     txt += ' mv tmp.txt input-files.txt\n'
1087     txt += ' echo "cat input-files.txt"\n'
1088     txt += ' echo "----------------------"\n'
1089     txt += ' cat input-files.txt\n'
1090     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1091     txt += ' mv tmp.txt processed-files.txt\n'
1092     txt += ' echo "----------------------"\n'
1093     txt += ' echo "cat processed-files.txt"\n'
1094     txt += ' echo "----------------------"\n'
1095     txt += ' cat processed-files.txt\n'
1096     txt += ' echo "----------------------"\n'
1097 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1098 fanzago 1.273 txt += ' fileverify_status=$?\n'
1099     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1100     txt += ' executable_exit_status=30001\n'
1101     txt += ' echo "ERROR ==> not all input files processed"\n'
1102     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1103     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1104     txt += ' fi\n'
1105 spiga 1.296 """
1106 spiga 1.232 txt += ' fi\n'
1107 spiga 1.189 txt += 'else\n'
1108     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1109     txt += 'fi\n'
1110     txt += '\n'
1111 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1112 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1113     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1114     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1115     txt += ' job_exit_code=$executable_exit_status\n'
1116     txt += ' func_exit\n'
1117     txt += 'fi\n\n'
1118 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1119     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1120     txt += 'job_exit_code=$executable_exit_status\n'
1121    
1122     return txt
1123    
1124 gutsche 1.5 def setParam_(self, param, value):
1125     self._params[param] = value
1126    
1127     def getParams(self):
1128     return self._params
1129 gutsche 1.8
1130 spiga 1.257 def outList(self,list=False):
1131 mcinquil 1.121 """
1132     check the dimension of the output files
1133     """
1134 spiga 1.169 txt = ''
1135     txt += 'echo ">>> list of expected files on output sandbox"\n'
1136 mcinquil 1.121 listOutFiles = []
1137 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1138 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1139 spiga 1.268 if len(self.output_file) <= 0:
1140     msg ="WARNING: no output files name have been defined!!\n"
1141     msg+="\tno output files will be reported back/staged\n"
1142 spiga 1.304 common.logger.info(msg)
1143 fanzago 1.148 if (self.return_data == 1):
1144 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1145 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1146 spiga 1.169 listOutFiles.append(stdout)
1147     listOutFiles.append(stderr)
1148 ewv 1.156 else:
1149 spiga 1.157 for file in (self.output_file_sandbox):
1150 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1151 spiga 1.169 listOutFiles.append(stdout)
1152     listOutFiles.append(stderr)
1153 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1154 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1155 spiga 1.169 txt += 'export filesToCheck\n'
1156 ewv 1.276
1157 spiga 1.257 if list : return self.output_file
1158 ewv 1.170 return txt