ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.327
Committed: Thu Jul 30 18:45:44 2009 UTC (15 years, 9 months ago) by ewv
Content type: text/x-python
Branch: MAIN
Changes since 1.326: +9 -0 lines
Log Message:
Add config parameters for lumi-based splitting

File Contents

# User Rev Content
1 ewv 1.327
2     __revision__ = "$Id: writeCfg.py,v 1.22 2009/07/29 21:20:03 ewv Exp $"
3     __version__ = "$Revision: 1.22 $"
4    
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 spiga 1.296 self.argsList = 1
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 ewv 1.250 size = 9.5
29 ewv 1.319 if self.server or common.scheduler.name().upper() in ['LSF','CAF']:
30     size = 99999
31 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
32 gutsche 1.72
33 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
34 gutsche 1.38 self.ncjobs = ncjobs
35    
36 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
37     self.additional_inbox_files = []
38     self.scriptExe = ''
39     self.executable = ''
40 slacapra 1.71 self.executable_arch = self.scram.getArch()
41 spiga 1.320 self.tgz_name = 'default.tgz'
42 corvo 1.56 self.scriptName = 'CMSSW.sh'
43 ewv 1.192 self.pset = ''
44 spiga 1.187 self.datasetPath = ''
45 gutsche 1.3
46 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
47 gutsche 1.50 # set FJR file name
48     self.fjrFileName = 'crab_fjr.xml'
49    
50 slacapra 1.1 self.version = self.scram.getSWVersion()
51 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
52 spiga 1.324 version_array = self.version.split('_')
53     self.CMSSW_major = 0
54     self.CMSSW_minor = 0
55     self.CMSSW_patch = 0
56 ewv 1.182 try:
57 spiga 1.324 self.CMSSW_major = int(version_array[1])
58     self.CMSSW_minor = int(version_array[2])
59     self.CMSSW_patch = int(version_array[3])
60 ewv 1.182 except:
61 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
62 ewv 1.182 raise CrabException(msg)
63    
64 ewv 1.276 if self.CMSSW_major < 1 or (self.CMSSW_major == 1 and self.CMSSW_minor < 5):
65     msg = "CRAB supports CMSSW >= 1_5_x only. Use an older CRAB version."
66     raise CrabException(msg)
67     """
68     As CMSSW versions are dropped we can drop more code:
69     1.X dropped: drop support for running .cfg on WN
70     2.0 dropped: drop all support for cfg here and in writeCfg
71     2.0 dropped: Recheck the random number seed support
72     """
73    
74 slacapra 1.1 ### collect Data cards
75 gutsche 1.66
76 ewv 1.226
77 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
78 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
79 ewv 1.226
80 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
81 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
82 spiga 1.236
83     if tmp =='':
84     msg = "Error: datasetpath not defined "
85     raise CrabException(msg)
86     elif string.lower(tmp)=='none':
87 slacapra 1.153 self.datasetPath = None
88     self.selectNoInput = 1
89 fanzago 1.318 self.primaryDataset = 'null'
90 slacapra 1.153 else:
91     self.datasetPath = tmp
92     self.selectNoInput = 0
93 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
94     self.dataTier = self.datasetPath.split("/")[2]
95 gutsche 1.5
96 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
97     self.ads = len(self.datasetPath.split("/")) > 3
98 ewv 1.295
99 ewv 1.327 # FUTURE: Can remove this check
100     if self.ads and self.CMSSW_major < 3:
101     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
102     common.logger.info(' Only file level, not lumi level, granularity is supported.')
103    
104 spiga 1.288 self.debugWrap=''
105 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
106     if self.debug_wrapper == 1: self.debugWrap='--debug'
107 slacapra 1.291
108 slacapra 1.1 ## now the application
109 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
110 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
111 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
112 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
113 slacapra 1.1
114 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
115 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
116 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
117 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
118 slacapra 1.153 if self.pset.lower() != 'none' :
119     if (not os.path.exists(self.pset)):
120     raise CrabException("User defined PSet file "+self.pset+" does not exist")
121     else:
122     self.pset = None
123 slacapra 1.1
124     # output files
125 slacapra 1.53 ## stuff which must be returned always via sandbox
126     self.output_file_sandbox = []
127    
128     # add fjr report by default via sandbox
129     self.output_file_sandbox.append(self.fjrFileName)
130    
131     # other output files to be returned via sandbox or copied to SE
132 mcinquil 1.216 outfileflag = False
133 slacapra 1.153 self.output_file = []
134     tmp = cfg_params.get('CMSSW.output_file',None)
135     if tmp :
136 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
137 mcinquil 1.216 outfileflag = True #output found
138     #else:
139     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
140 slacapra 1.1
141     # script_exe file as additional file in inputSandbox
142 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
143     if self.scriptExe :
144 slacapra 1.176 if not os.path.isfile(self.scriptExe):
145     msg ="ERROR. file "+self.scriptExe+" not found"
146     raise CrabException(msg)
147     self.additional_inbox_files.append(string.strip(self.scriptExe))
148 slacapra 1.70
149 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
150     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
151    
152 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
153 slacapra 1.176 msg ="Error. script_exe not defined"
154     raise CrabException(msg)
155 spiga 1.42
156 ewv 1.226 # use parent files...
157 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
158 spiga 1.204
159 slacapra 1.1 ## additional input files
160 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
161 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
162 slacapra 1.70 for tmp in tmpAddFiles:
163     tmp = string.strip(tmp)
164     dirname = ''
165     if not tmp[0]=="/": dirname = "."
166 corvo 1.85 files = []
167     if string.find(tmp,"*")>-1:
168     files = glob.glob(os.path.join(dirname, tmp))
169     if len(files)==0:
170     raise CrabException("No additional input file found with this pattern: "+tmp)
171     else:
172     files.append(tmp)
173 slacapra 1.70 for file in files:
174     if not os.path.exists(file):
175     raise CrabException("Additional input file not found: "+file)
176 slacapra 1.45 pass
177 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
178 slacapra 1.1 pass
179     pass
180 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
181 slacapra 1.153 pass
182 gutsche 1.3
183 gutsche 1.35
184 ewv 1.160 ## New method of dealing with seeds
185     self.incrementSeeds = []
186     self.preserveSeeds = []
187     if cfg_params.has_key('CMSSW.preserve_seeds'):
188     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
189     for tmp in tmpList:
190     tmp.strip()
191     self.preserveSeeds.append(tmp)
192     if cfg_params.has_key('CMSSW.increment_seeds'):
193     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
194     for tmp in tmpList:
195     tmp.strip()
196     self.incrementSeeds.append(tmp)
197    
198 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
199 slacapra 1.90
200 fanzago 1.318 # Copy/return/publish
201 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
202     self.return_data = int(cfg_params.get('USER.return_data',0))
203 fanzago 1.318 ### FEDE ###
204     self.publish_data = int(cfg_params.get('USER.publish_data',0))
205     if (self.publish_data == 1):
206     if not cfg_params.has_key('USER.publish_data_name'):
207     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
208     else:
209     self.processedDataset = cfg_params['USER.publish_data_name']
210     #### check of lenght of datasetname to publish ####
211     common.logger.debug("test 100 char limit on datasetname")
212     user = getUserName()
213     common.logger.debug("user = " + user)
214     len_user_name = len(user)
215 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
216 fanzago 1.318 len_processedDataset = len(self.processedDataset)
217     common.logger.debug("processedDataset " + self.processedDataset)
218     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
219     if (self.datasetPath != None ):
220     len_primary = len(self.primaryDataset)
221     common.logger.debug("primaryDataset = " + self.primaryDataset)
222     common.logger.debug("len_primary = " + str(len_primary))
223     #common.logger.info("59 - len_user_name - len_primary = " + str(59 - len_user_name - len_primary))
224     if (len_processedDataset > (59 - len_user_name - len_primary)):
225     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
226     else:
227 ewv 1.319 if (len_processedDataset > (59 - len_user_name) / 2):
228 fanzago 1.318 raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
229 ewv 1.276
230     self.conf = {}
231     self.conf['pubdata'] = None
232 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
233 slacapra 1.1 #DBSDLS-start
234 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
235 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
236     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
237 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
238 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
239 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
240 gutsche 1.35 blockSites = {}
241 slacapra 1.9 if self.datasetPath:
242 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
243 ewv 1.131 #DBSDLS-end
244 spiga 1.269 self.conf['blockSites']=blockSites
245    
246 slacapra 1.9 ## Select Splitting
247 spiga 1.269 splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
248    
249 ewv 1.131 if self.selectNoInput:
250 spiga 1.187 if self.pset == None:
251 ewv 1.276 self.algo = 'ForScript'
252 spiga 1.42 else:
253 spiga 1.271 self.algo = 'NoInput'
254 ewv 1.276 self.conf['managedGenerators']=self.managedGenerators
255     self.conf['generator']=self.generator
256 ewv 1.326 elif self.ads:
257     self.algo = 'LumiBased'
258 ewv 1.276 elif splitByRun ==1:
259     self.algo = 'RunBased'
260 spiga 1.269 else:
261 ewv 1.276 self.algo = 'EventBased'
262 ewv 1.326 common.logger.debug("Job splitting method: %s" % self.algo)
263 ewv 1.276
264     splitter = JobSplitter(self.cfg_params,self.conf)
265 spiga 1.269 self.dict = splitter.Algos()[self.algo]()
266 gutsche 1.5
267 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
268     self.rootArgsFilename= 'arguments'
269 spiga 1.208 # modify Pset only the first time
270 spiga 1.320 if isNew:
271     if self.pset != None: self.ModifyPset()
272 spiga 1.300
273 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
274     self.tarNameWithPath = self.getTarBall(self.executable)
275 spiga 1.293
276    
277     def ModifyPset(self):
278     import PsetManipulator as pp
279     PsetEdit = pp.PsetManipulator(self.pset)
280     try:
281     # Add FrameworkJobReport to parameter-set, set max events.
282     # Reset later for data jobs by writeCFG which does all modifications
283 ewv 1.295 PsetEdit.maxEvent(1)
284 spiga 1.293 PsetEdit.skipEvent(0)
285     PsetEdit.psetWriter(self.configFilename())
286     ## If present, add TFileService to output files
287     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
288     tfsOutput = PsetEdit.getTFileService()
289     if tfsOutput:
290     if tfsOutput in self.output_file:
291 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
292 spiga 1.293 else:
293     outfileflag = True #output found
294     self.output_file.append(tfsOutput)
295 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
296 spiga 1.293 pass
297     pass
298 ewv 1.321 # If present and requested, add PoolOutputModule to output files
299 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
300 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
301     if edmOutput:
302 ewv 1.321 for outputFile in edmOutput:
303     if outputFile in self.output_file:
304 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
305 ewv 1.321 else:
306     self.output_file.append(outputFile)
307     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
308     # not requested, check anyhow to avoid accidental T2 overload
309 slacapra 1.297 else:
310 ewv 1.321 if edmOutput:
311     missedFiles = []
312     for outputFile in edmOutput:
313     if outputFile not in self.output_file:
314     missedFiles.append(outputFile)
315     if missedFiles:
316     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
317     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
318     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
319     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
320     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
321     common.logger.info(msg)
322 spiga 1.322 else :
323 ewv 1.321 raise CrabException(msg)
324 ewv 1.301
325     if (PsetEdit.getBadFilesSetting()):
326     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
327 spiga 1.304 common.logger.info(msg)
328 ewv 1.301
329 slacapra 1.297 except CrabException, msg:
330 spiga 1.304 common.logger.info(str(msg))
331 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
332 spiga 1.293 raise CrabException(msg)
333    
334 gutsche 1.3
335 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
336    
337 slacapra 1.86 import DataDiscovery
338     import DataLocation
339 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
340 gutsche 1.3
341     datasetPath=self.datasetPath
342    
343 slacapra 1.1 ## Contact the DBS
344 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
345 slacapra 1.1 try:
346 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
347 slacapra 1.1 self.pubdata.fetchDBSInfo()
348    
349 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
350 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
351     raise CrabException(msg)
352 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
353 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
354     raise CrabException(msg)
355 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
356 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
357 slacapra 1.1 raise CrabException(msg)
358    
359 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
360 slacapra 1.270 #print self.filesbyblock
361 spiga 1.269 self.conf['pubdata']=self.pubdata
362 gutsche 1.3
363 slacapra 1.1 ## get max number of events
364 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
365 slacapra 1.1
366     ## Contact the DLS and build a list of sites hosting the fileblocks
367     try:
368 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
369 gutsche 1.6 dataloc.fetchDLSInfo()
370 slacapra 1.263
371 slacapra 1.41 except DataLocation.DataLocationError , ex:
372 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
373     raise CrabException(msg)
374 ewv 1.131
375 slacapra 1.1
376 slacapra 1.270 unsorted_sites = dataloc.getSites()
377     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
378     for lfn in self.filesbyblock.keys():
379     if unsorted_sites.has_key(lfn):
380     sites[lfn]=unsorted_sites[lfn]
381     else:
382     sites[lfn]=[]
383    
384 slacapra 1.264 if len(sites)==0:
385 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
386     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
387     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
388 slacapra 1.264 raise CrabException(msg)
389    
390 gutsche 1.35 allSites = []
391     listSites = sites.values()
392 slacapra 1.63 for listSite in listSites:
393     for oneSite in listSite:
394 gutsche 1.35 allSites.append(oneSite)
395 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
396 ewv 1.295
397 gutsche 1.3
398 gutsche 1.92 # screen output
399 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
400 gutsche 1.92
401 gutsche 1.35 return sites
402 ewv 1.131
403 spiga 1.42
404 spiga 1.208 def split(self, jobParams,firstJobID):
405 ewv 1.276
406 spiga 1.293 jobParams = self.dict['args']
407 spiga 1.269 njobs = self.dict['njobs']
408     self.jobDestination = self.dict['jobDestination']
409 ewv 1.131
410 slacapra 1.263 if njobs==0:
411     raise CrabException("Ask to split "+str(njobs)+" jobs: aborting")
412 ewv 1.319 if not self.server and njobs > 500:
413     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
414 slacapra 1.263
415 gutsche 1.3 # create the empty structure
416     for i in range(njobs):
417     jobParams.append("")
418 ewv 1.131
419 spiga 1.165 listID=[]
420     listField=[]
421 spiga 1.293 listDictions=[]
422 spiga 1.300 exist= os.path.exists(self.argsFile)
423 spiga 1.208 for id in range(njobs):
424     job = id + int(firstJobID)
425 spiga 1.167 listID.append(job+1)
426 spiga 1.162 job_ToSave ={}
427 spiga 1.169 concString = ' '
428 spiga 1.165 argu=''
429 spiga 1.293 str_argu = str(job+1)
430 spiga 1.208 if len(jobParams[id]):
431 ewv 1.295 argu = {'JobID': job+1}
432 spiga 1.293 for i in range(len(jobParams[id])):
433     argu[self.dict['params'][i]]=jobParams[id][i]
434 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
435 ewv 1.295 # just for debug
436 spiga 1.293 str_argu += concString.join(jobParams[id])
437 spiga 1.314 if argu != '': listDictions.append(argu)
438 spiga 1.298 job_ToSave['arguments']= str(job+1)
439 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
440 spiga 1.165 listField.append(job_ToSave)
441 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
442     cms_se = CmsSEMap()
443 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
444 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
445 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
446     msg+="\t CMSDestination: %s "%(str(SEDestination))
447 spiga 1.307 common.logger.log(10-1,msg)
448 spiga 1.293 # write xml
449 ewv 1.295 if len(listDictions):
450 spiga 1.293 if exist==False: self.CreateXML()
451     self.addEntry(listDictions)
452 spiga 1.320 # self.zipXMLfile()
453 spiga 1.187 common._db.updateJob_(listID,listField)
454 spiga 1.293 return
455 ewv 1.313
456 spiga 1.320 # def zipXMLfile(self):
457 ewv 1.313
458 spiga 1.320 # import tarfile
459     # try:
460     # tar = tarfile.open(self.tarNameWithPath, "a")
461     # tar.add(self.argsFile, os.path.basename(self.argsFile))
462     # tar.close()
463     # except IOError, exc:
464     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
465     # msg += str(exc)
466     # raise CrabException(msg)
467     # except tarfile.TarError, exc:
468     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
469     # msg += str(exc)
470     # raise CrabException(msg)
471 ewv 1.325
472 spiga 1.293 def CreateXML(self):
473     """
474 ewv 1.295 """
475 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
476     outfile = file( self.argsFile, 'w').write(str(result))
477 ewv 1.295 return
478 spiga 1.293
479     def addEntry(self, listDictions):
480     """
481     _addEntry_
482 ewv 1.295
483 spiga 1.293 add an entry to the xml file
484     """
485     from IMProv.IMProvLoader import loadIMProvFile
486     ## load xml
487 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
488 spiga 1.293 entrname= 'Job'
489     for dictions in listDictions:
490     report = IMProvNode(entrname , None, **dictions)
491     improvDoc.addNode(report)
492 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
493 gutsche 1.3 return
494 ewv 1.131
495 gutsche 1.3 def numberOfJobs(self):
496 spiga 1.269 return self.dict['njobs']
497 gutsche 1.3
498 slacapra 1.1 def getTarBall(self, exe):
499     """
500     Return the TarBall with lib and exe
501     """
502 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
503     if os.path.exists(self.tgzNameWithPath):
504     return self.tgzNameWithPath
505 slacapra 1.1
506     # Prepare a tar gzipped file with user binaries.
507     self.buildTar_(exe)
508    
509 spiga 1.320 return string.strip(self.tgzNameWithPath)
510 slacapra 1.1
511     def buildTar_(self, executable):
512    
513     # First of all declare the user Scram area
514     swArea = self.scram.getSWArea_()
515     swReleaseTop = self.scram.getReleaseTop_()
516 ewv 1.131
517 slacapra 1.1 ## check if working area is release top
518     if swReleaseTop == '' or swArea == swReleaseTop:
519 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
520 slacapra 1.1 return
521    
522 slacapra 1.61 import tarfile
523     try: # create tar ball
524 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
525 slacapra 1.61 ## First find the executable
526 slacapra 1.86 if (self.executable != ''):
527 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
528     if ( not exeWithPath ):
529     raise CrabException('User executable '+executable+' not found')
530 ewv 1.131
531 slacapra 1.61 ## then check if it's private or not
532     if exeWithPath.find(swReleaseTop) == -1:
533     # the exe is private, so we must ship
534 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
535 slacapra 1.61 path = swArea+'/'
536 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
537     if exeWithPath.find(path) >= 0 :
538     exe = string.replace(exeWithPath, path,'')
539 slacapra 1.129 tar.add(path+exe,exe)
540 corvo 1.85 else :
541     tar.add(exeWithPath,os.path.basename(executable))
542 slacapra 1.61 pass
543     else:
544     # the exe is from release, we'll find it on WN
545     pass
546 ewv 1.131
547 slacapra 1.61 ## Now get the libraries: only those in local working area
548 slacapra 1.256 tar.dereference=True
549 slacapra 1.61 libDir = 'lib'
550     lib = swArea+'/' +libDir
551 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
552 slacapra 1.61 if os.path.exists(lib):
553     tar.add(lib,libDir)
554 ewv 1.131
555 slacapra 1.61 ## Now check if module dir is present
556     moduleDir = 'module'
557     module = swArea + '/' + moduleDir
558     if os.path.isdir(module):
559     tar.add(module,moduleDir)
560 slacapra 1.256 tar.dereference=False
561 slacapra 1.61
562     ## Now check if any data dir(s) is present
563 spiga 1.179 self.dataExist = False
564 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
565 slacapra 1.206 while len(todo_list):
566     entry, name = todo_list.pop()
567 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
568 slacapra 1.206 continue
569 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
570 slacapra 1.206 entryPath = entry + '/'
571 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
572 slacapra 1.206 if name == 'data':
573     self.dataExist=True
574 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
575 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
576 slacapra 1.206 pass
577     pass
578 ewv 1.182
579 spiga 1.179 ### CMSSW ParameterSet
580     if not self.pset is None:
581     cfg_file = common.work_space.jobDir()+self.configFilename()
582 ewv 1.182 tar.add(cfg_file,self.configFilename())
583 ewv 1.313
584 spiga 1.309 try:
585     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
586     tar.add(crab_cfg_file,'crab.cfg')
587     except:
588     pass
589 fanzago 1.93
590 fanzago 1.152 ## Add ProdCommon dir to tar
591 slacapra 1.211 prodcommonDir = './'
592     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
593 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
594 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
595     'WMCore/__init__.py','WMCore/Algorithms']
596 slacapra 1.214 for file in neededStuff:
597     tar.add(prodcommonPath+file,prodcommonDir+file)
598 spiga 1.179
599     ##### ML stuff
600     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
601     path=os.environ['CRABDIR'] + '/python/'
602     for file in ML_file_list:
603     tar.add(path+file,file)
604    
605     ##### Utils
606 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
607 spiga 1.179 for file in Utils_file_list:
608     tar.add(path+file,file)
609 ewv 1.131
610 ewv 1.182 ##### AdditionalFiles
611 slacapra 1.253 tar.dereference=True
612 spiga 1.179 for file in self.additional_inbox_files:
613     tar.add(file,string.split(file,'/')[-1])
614 slacapra 1.253 tar.dereference=False
615 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
616 ewv 1.182
617 slacapra 1.61 tar.close()
618 mcinquil 1.241 except IOError, exc:
619 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
620 spiga 1.304 msg += str(exc)
621     raise CrabException(msg)
622 mcinquil 1.241 except tarfile.TarError, exc:
623 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
624 spiga 1.304 msg += str(exc)
625     raise CrabException(msg)
626 spiga 1.300
627 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
628     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
629 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
630 ewv 1.250 +'MB input sandbox limit \n'
631 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
632     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
633     msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServer#CRABSERVER_for_Users'
634     raise CrabException(msg)
635 gutsche 1.72
636 slacapra 1.61 ## create tar-ball with ML stuff
637 slacapra 1.97
638 spiga 1.165 def wsSetupEnvironment(self, nj=0):
639 slacapra 1.1 """
640     Returns part of a job script which prepares
641     the execution environment for the job 'nj'.
642     """
643 ewv 1.276 # FUTURE: Drop support for .cfg when possible
644 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
645     psetName = 'pset.py'
646     else:
647     psetName = 'pset.cfg'
648 slacapra 1.1 # Prepare JobType-independent part
649 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
650 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
651 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
652 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
653 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
654 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
655 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
656 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
657     txt += ' job_exit_code=10016\n'
658     txt += ' func_exit\n'
659 gutsche 1.3 txt += ' fi\n'
660 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
661 gutsche 1.3 txt += '\n'
662     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
663     txt += ' cd $WORKING_DIR\n'
664 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
665 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
666 spiga 1.282 #Setup SGE Environment
667 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
668 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
669    
670 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
671     txt += self.wsSetupCMSLCGEnvironment_()
672    
673 gutsche 1.3 txt += 'fi\n'
674 slacapra 1.1
675     # Prepare JobType-specific part
676     scram = self.scram.commandName()
677     txt += '\n\n'
678 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
679     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
680 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
681     txt += 'status=$?\n'
682     txt += 'if [ $status != 0 ] ; then\n'
683 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
684     txt += ' job_exit_code=10034\n'
685 fanzago 1.163 txt += ' func_exit\n'
686 slacapra 1.1 txt += 'fi \n'
687     txt += 'cd '+self.version+'\n'
688 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
689 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
690 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
691 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
692     txt += ' echo "ERROR ==> Problem with the command: "\n'
693     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
694     txt += ' job_exit_code=10034\n'
695     txt += ' func_exit\n'
696     txt += 'fi \n'
697 slacapra 1.1 # Handle the arguments:
698     txt += "\n"
699 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
700 slacapra 1.1 txt += "\n"
701 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
702 slacapra 1.1 txt += "then\n"
703 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
704     txt += ' job_exit_code=50113\n'
705     txt += " func_exit\n"
706 slacapra 1.1 txt += "fi\n"
707     txt += "\n"
708    
709     # Prepare job-specific part
710     job = common.job_list[nj]
711 ewv 1.131 if (self.datasetPath):
712 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
713     #DataTier = self.datasetPath.split("/")[2]
714 fanzago 1.93 txt += '\n'
715     txt += 'DatasetPath='+self.datasetPath+'\n'
716    
717 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
718 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
719 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
720 fanzago 1.93
721     else:
722 fanzago 1.318 #self.primaryDataset = 'null'
723 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
724     txt += 'PrimaryDataset=null\n'
725     txt += 'DataTier=null\n'
726     txt += 'ApplicationFamily=MCDataTier\n'
727 ewv 1.170 if self.pset != None:
728 spiga 1.42 pset = os.path.basename(job.configFilename())
729     txt += '\n'
730 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
731 spiga 1.296
732 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
733     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
734     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
735     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
736 slacapra 1.90
737 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
738 ewv 1.319 else:
739 spiga 1.314 txt += '\n'
740 spiga 1.315 if self.AdditionalArgs: txt += 'export AdditionalArgs=%s\n'%(self.AdditionalArgs)
741     if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
742 gutsche 1.3 return txt
743 slacapra 1.176
744 fanzago 1.166 def wsUntarSoftware(self, nj=0):
745 gutsche 1.3 """
746     Put in the script the commands to build an executable
747     or a library.
748     """
749    
750 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
751 gutsche 1.3
752     if os.path.isfile(self.tgzNameWithPath):
753 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
754 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
755 fanzago 1.285 if self.debug_wrapper==1 :
756 spiga 1.199 txt += 'ls -Al \n'
757 gutsche 1.3 txt += 'untar_status=$? \n'
758     txt += 'if [ $untar_status -ne 0 ]; then \n'
759 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
760     txt += ' job_exit_code=$untar_status\n'
761     txt += ' func_exit\n'
762 gutsche 1.3 txt += 'else \n'
763     txt += ' echo "Successful untar" \n'
764     txt += 'fi \n'
765 gutsche 1.50 txt += '\n'
766 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
767 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
768 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
769 gutsche 1.50 txt += 'else\n'
770 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
771 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
772 gutsche 1.50 txt += 'fi\n'
773     txt += '\n'
774    
775 gutsche 1.3 pass
776 ewv 1.131
777 slacapra 1.1 return txt
778 ewv 1.170
779 fanzago 1.166 def wsBuildExe(self, nj=0):
780     """
781     Put in the script the commands to build an executable
782     or a library.
783     """
784    
785     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
786     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
787    
788 ewv 1.170 txt += 'rm -r lib/ module/ \n'
789     txt += 'mv $RUNTIME_AREA/lib/ . \n'
790     txt += 'mv $RUNTIME_AREA/module/ . \n'
791 spiga 1.186 if self.dataExist == True:
792     txt += 'rm -r src/ \n'
793     txt += 'mv $RUNTIME_AREA/src/ . \n'
794 ewv 1.182 if len(self.additional_inbox_files)>0:
795 spiga 1.179 for file in self.additional_inbox_files:
796 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
797 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
798     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
799 ewv 1.170
800 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
801 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
802 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
803 fanzago 1.166 txt += 'else\n'
804 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
805 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
806     txt += 'fi\n'
807     txt += '\n'
808    
809 slacapra 1.302 if self.pset != None:
810 slacapra 1.303 # FUTURE: Drop support for .cfg when possible
811     if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
812     psetName = 'pset.py'
813     else:
814     psetName = 'pset.cfg'
815 slacapra 1.302 # FUTURE: Can simply for 2_1_x and higher
816     txt += '\n'
817     if self.debug_wrapper == 1:
818     txt += 'echo "***** cat ' + psetName + ' *********"\n'
819     txt += 'cat ' + psetName + '\n'
820     txt += 'echo "****** end ' + psetName + ' ********"\n'
821     txt += '\n'
822     txt += 'echo "***********************" \n'
823     txt += 'which edmConfigHash \n'
824     txt += 'echo "***********************" \n'
825     if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
826     txt += 'edmConfigHash ' + psetName + ' \n'
827     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
828     else:
829     txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
830     txt += 'echo "PSETHASH = $PSETHASH" \n'
831     #### FEDE temporary fix for noEdm files #####
832     txt += 'if [ -z "$PSETHASH" ]; then \n'
833     txt += ' export PSETHASH=null\n'
834     txt += 'fi \n'
835     #############################################
836     txt += '\n'
837 fanzago 1.166 return txt
838 slacapra 1.1
839 ewv 1.131
840 slacapra 1.1 def executableName(self):
841 ewv 1.192 if self.scriptExe:
842 spiga 1.42 return "sh "
843     else:
844     return self.executable
845 slacapra 1.1
846     def executableArgs(self):
847 ewv 1.160 # FUTURE: This function tests the CMSSW version. Can be simplified as we drop support for old versions
848 ewv 1.276 if self.scriptExe:
849 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
850 fanzago 1.115 else:
851 ewv 1.160 ex_args = ""
852 ewv 1.276 ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
853     # Type of config file depends on CMSSW version
854 ewv 1.184 if self.CMSSW_major >= 2 :
855 ewv 1.171 ex_args += " -p pset.py"
856 fanzago 1.115 else:
857 ewv 1.160 ex_args += " -p pset.cfg"
858     return ex_args
859 slacapra 1.1
860     def inputSandbox(self, nj):
861     """
862     Returns a list of filenames to be put in JDL input sandbox.
863     """
864     inp_box = []
865     if os.path.isfile(self.tgzNameWithPath):
866     inp_box.append(self.tgzNameWithPath)
867 spiga 1.320 if os.path.isfile(self.argsFile):
868     inp_box.append(self.argsFile)
869 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
870 slacapra 1.1 return inp_box
871    
872     def outputSandbox(self, nj):
873     """
874     Returns a list of filenames to be put in JDL output sandbox.
875     """
876     out_box = []
877    
878     ## User Declared output files
879 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
880 ewv 1.131 n_out = nj + 1
881 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
882 slacapra 1.1 return out_box
883    
884    
885     def wsRenameOutput(self, nj):
886     """
887     Returns part of a job script which renames the produced files.
888     """
889    
890 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
891 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
892     txt += 'echo ">>> current directory content:"\n'
893 fanzago 1.285 if self.debug_wrapper==1:
894 spiga 1.199 txt += 'ls -Al\n'
895 fanzago 1.145 txt += '\n'
896 slacapra 1.54
897 fanzago 1.128 for fileWithSuffix in (self.output_file):
898 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
899 slacapra 1.1 txt += '\n'
900 gutsche 1.7 txt += '# check output file\n'
901 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
902 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
903     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
904 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
905 ewv 1.147 else:
906     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
907     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
908 slacapra 1.106 txt += 'else\n'
909 fanzago 1.161 txt += ' job_exit_code=60302\n'
910     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
911 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
912 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
913     txt += ' echo "prepare dummy output file"\n'
914     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
915     txt += ' fi \n'
916 slacapra 1.1 txt += 'fi\n'
917 slacapra 1.105 file_list = []
918     for fileWithSuffix in (self.output_file):
919 spiga 1.246 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
920 ewv 1.131
921 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
922 fanzago 1.149 txt += '\n'
923 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
924     txt += 'echo ">>> current directory content:"\n'
925 fanzago 1.285 if self.debug_wrapper==1:
926 spiga 1.199 txt += 'ls -Al\n'
927 fanzago 1.148 txt += '\n'
928 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
929 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
930 slacapra 1.1 return txt
931    
932 slacapra 1.63 def getRequirements(self, nj=[]):
933 slacapra 1.1 """
934 ewv 1.131 return job requirements to add to jdl files
935 slacapra 1.1 """
936     req = ''
937 slacapra 1.47 if self.version:
938 slacapra 1.10 req='Member("VO-cms-' + \
939 slacapra 1.47 self.version + \
940 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
941 ewv 1.192 if self.executable_arch:
942 gutsche 1.107 req+=' && Member("VO-cms-' + \
943 slacapra 1.105 self.executable_arch + \
944     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
945 gutsche 1.35
946     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
947 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
948 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
949     if ( self.cfg_params.get('GRID.use_cream',None) ):
950     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
951     else:
952     req += ' && other.GlueCEStateStatus == "Production" '
953 gutsche 1.35
954 slacapra 1.1 return req
955 gutsche 1.3
956     def configFilename(self):
957     """ return the config filename """
958 ewv 1.182 # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
959 ewv 1.184 if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
960 slacapra 1.316 return self.name()+'.py'
961 ewv 1.182 else:
962 slacapra 1.316 return self.name()+'.cfg'
963 gutsche 1.3
964     def wsSetupCMSOSGEnvironment_(self):
965     """
966     Returns part of a job script which is prepares
967     the execution environment and which is common for all CMS jobs.
968     """
969 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
970     txt += ' echo ">>> setup CMS OSG environment:"\n'
971 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
972     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
973 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
974 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
975 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
976 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
977     txt += ' else\n'
978 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
979     txt += ' job_exit_code=10020\n'
980     txt += ' func_exit\n'
981 fanzago 1.133 txt += ' fi\n'
982 gutsche 1.3 txt += '\n'
983 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
984 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
985 gutsche 1.3
986     return txt
987 ewv 1.131
988 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
989     """
990     Returns part of a job script which is prepares
991     the execution environment and which is common for all CMS jobs.
992     """
993 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
994     txt += ' echo ">>> setup CMS LCG environment:"\n'
995 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
996     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
997     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
998     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
999 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1000     txt += ' job_exit_code=10031\n'
1001     txt += ' func_exit\n'
1002 fanzago 1.133 txt += ' else\n'
1003     txt += ' echo "Sourcing environment... "\n'
1004     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1005 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1006     txt += ' job_exit_code=10020\n'
1007     txt += ' func_exit\n'
1008 fanzago 1.133 txt += ' fi\n'
1009     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1010     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1011     txt += ' result=$?\n'
1012     txt += ' if [ $result -ne 0 ]; then\n'
1013 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1014     txt += ' job_exit_code=10032\n'
1015     txt += ' func_exit\n'
1016 fanzago 1.133 txt += ' fi\n'
1017     txt += ' fi\n'
1018     txt += ' \n'
1019 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1020 gutsche 1.3 return txt
1021 gutsche 1.5
1022 spiga 1.238 def wsModifyReport(self, nj):
1023 fanzago 1.93 """
1024 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1025 fanzago 1.93 """
1026 ewv 1.250
1027 fanzago 1.281 txt = ''
1028 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1029 fanzago 1.292 if (self.copy_data == 1):
1030 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1031 fanzago 1.318 #publish_data = int(self.cfg_params.get('USER.publish_data',0))
1032 ewv 1.283
1033 spiga 1.238
1034     txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1035 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1036 fanzago 1.175 txt += 'else\n'
1037     txt += ' FOR_LFN=/copy_problems/ \n'
1038     txt += 'fi\n'
1039 ewv 1.182
1040 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1041 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1042 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1043     txt += 'echo "SE_PATH = $SE_PATH"\n'
1044     txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1045     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1046 fanzago 1.281
1047    
1048 fanzago 1.323 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1049 fanzago 1.318 if (self.publish_data == 1):
1050     #processedDataset = self.cfg_params['USER.publish_data_name']
1051     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1052 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1053     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1054 fanzago 1.281
1055 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1056     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1057 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1058     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1059     txt += ' modifyReport_result=70500\n'
1060     txt += ' job_exit_code=$modifyReport_result\n'
1061     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1062     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1063     txt += 'else\n'
1064     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1065 spiga 1.103 txt += 'fi\n'
1066 fanzago 1.93 return txt
1067 ewv 1.283
1068 ewv 1.192 def wsParseFJR(self):
1069 spiga 1.189 """
1070 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1071 spiga 1.189 """
1072     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1073     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1074     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1075     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1076 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1077 fanzago 1.285 if self.debug_wrapper==1 :
1078 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1079     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1080 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1081     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1082 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1083 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1084 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1085     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1086 spiga 1.189 txt += ' else\n'
1087     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1088     txt += ' fi\n'
1089     txt += ' else\n'
1090     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1091     txt += ' fi\n'
1092     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1093 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1094 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1095 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1096 spiga 1.296 """
1097 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1098 spiga 1.189 # VERIFY PROCESSED DATA
1099 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1100     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1101     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1102     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1103     txt += ' mv tmp.txt input-files.txt\n'
1104     txt += ' echo "cat input-files.txt"\n'
1105     txt += ' echo "----------------------"\n'
1106     txt += ' cat input-files.txt\n'
1107     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1108     txt += ' mv tmp.txt processed-files.txt\n'
1109     txt += ' echo "----------------------"\n'
1110     txt += ' echo "cat processed-files.txt"\n'
1111     txt += ' echo "----------------------"\n'
1112     txt += ' cat processed-files.txt\n'
1113     txt += ' echo "----------------------"\n'
1114 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1115 fanzago 1.273 txt += ' fileverify_status=$?\n'
1116     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1117     txt += ' executable_exit_status=30001\n'
1118     txt += ' echo "ERROR ==> not all input files processed"\n'
1119     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1120     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1121     txt += ' fi\n'
1122 spiga 1.296 """
1123 spiga 1.232 txt += ' fi\n'
1124 spiga 1.189 txt += 'else\n'
1125     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1126     txt += 'fi\n'
1127     txt += '\n'
1128 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1129 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1130     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1131     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1132     txt += ' job_exit_code=$executable_exit_status\n'
1133     txt += ' func_exit\n'
1134     txt += 'fi\n\n'
1135 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1136     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1137     txt += 'job_exit_code=$executable_exit_status\n'
1138    
1139     return txt
1140    
1141 gutsche 1.5 def setParam_(self, param, value):
1142     self._params[param] = value
1143    
1144     def getParams(self):
1145     return self._params
1146 gutsche 1.8
1147 spiga 1.257 def outList(self,list=False):
1148 mcinquil 1.121 """
1149     check the dimension of the output files
1150     """
1151 spiga 1.169 txt = ''
1152     txt += 'echo ">>> list of expected files on output sandbox"\n'
1153 mcinquil 1.121 listOutFiles = []
1154 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1155 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1156 spiga 1.268 if len(self.output_file) <= 0:
1157     msg ="WARNING: no output files name have been defined!!\n"
1158     msg+="\tno output files will be reported back/staged\n"
1159 spiga 1.304 common.logger.info(msg)
1160 fanzago 1.148 if (self.return_data == 1):
1161 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1162 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1163 spiga 1.169 listOutFiles.append(stdout)
1164     listOutFiles.append(stderr)
1165 ewv 1.156 else:
1166 spiga 1.157 for file in (self.output_file_sandbox):
1167 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1168 spiga 1.169 listOutFiles.append(stdout)
1169     listOutFiles.append(stderr)
1170 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1171 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1172 spiga 1.169 txt += 'export filesToCheck\n'
1173 ewv 1.276
1174 spiga 1.257 if list : return self.output_file
1175 ewv 1.170 return txt