ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.353
Committed: Wed Feb 10 21:35:34 2010 UTC (15 years, 2 months ago) by spiga
Content type: text/x-python
Branch: MAIN
Changes since 1.352: +3 -3 lines
Log Message:
deprecated glite_slc5 and glitecoll

File Contents

# User Rev Content
1 ewv 1.327
2 spiga 1.353 __revision__ = "$Id: cms_cmssw.py,v 1.352 2010/02/04 16:35:25 ewv Exp $"
3     __version__ = "$Revision: 1.352 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 farinafa 1.346 self.argsList = 2
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.338 ll = len(self.datasetPath.split("/"))
92     if (ll < 4):
93     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
94     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
95     raise CrabException(msg)
96 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
97     self.dataTier = self.datasetPath.split("/")[2]
98 gutsche 1.5
99 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
100 ewv 1.330 self.ads = False
101     if self.datasetPath:
102     self.ads = len(self.datasetPath.split("/")) > 4
103 ewv 1.295
104 ewv 1.327 # FUTURE: Can remove this check
105     if self.ads and self.CMSSW_major < 3:
106     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
107     common.logger.info(' Only file level, not lumi level, granularity is supported.')
108    
109 spiga 1.288 self.debugWrap=''
110 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
111     if self.debug_wrapper == 1: self.debugWrap='--debug'
112 slacapra 1.291
113 slacapra 1.1 ## now the application
114 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
115 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
116 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
117 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
118 slacapra 1.1
119 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
120 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
121 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
122 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
123 slacapra 1.153 if self.pset.lower() != 'none' :
124     if (not os.path.exists(self.pset)):
125     raise CrabException("User defined PSet file "+self.pset+" does not exist")
126     else:
127     self.pset = None
128 slacapra 1.1
129     # output files
130 slacapra 1.53 ## stuff which must be returned always via sandbox
131     self.output_file_sandbox = []
132    
133     # add fjr report by default via sandbox
134     self.output_file_sandbox.append(self.fjrFileName)
135    
136     # other output files to be returned via sandbox or copied to SE
137 mcinquil 1.216 outfileflag = False
138 slacapra 1.153 self.output_file = []
139     tmp = cfg_params.get('CMSSW.output_file',None)
140     if tmp :
141 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
142 mcinquil 1.216 outfileflag = True #output found
143     #else:
144     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1
146     # script_exe file as additional file in inputSandbox
147 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
148     if self.scriptExe :
149 slacapra 1.176 if not os.path.isfile(self.scriptExe):
150     msg ="ERROR. file "+self.scriptExe+" not found"
151     raise CrabException(msg)
152     self.additional_inbox_files.append(string.strip(self.scriptExe))
153 slacapra 1.70
154 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
155     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
156    
157 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
158 slacapra 1.176 msg ="Error. script_exe not defined"
159     raise CrabException(msg)
160 spiga 1.42
161 ewv 1.226 # use parent files...
162 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
163 spiga 1.204
164 slacapra 1.1 ## additional input files
165 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
166 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
167 slacapra 1.70 for tmp in tmpAddFiles:
168     tmp = string.strip(tmp)
169     dirname = ''
170     if not tmp[0]=="/": dirname = "."
171 corvo 1.85 files = []
172     if string.find(tmp,"*")>-1:
173     files = glob.glob(os.path.join(dirname, tmp))
174     if len(files)==0:
175     raise CrabException("No additional input file found with this pattern: "+tmp)
176     else:
177     files.append(tmp)
178 slacapra 1.70 for file in files:
179     if not os.path.exists(file):
180     raise CrabException("Additional input file not found: "+file)
181 slacapra 1.45 pass
182 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
183 slacapra 1.1 pass
184     pass
185 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
186 slacapra 1.153 pass
187 gutsche 1.3
188 gutsche 1.35
189 ewv 1.160 ## New method of dealing with seeds
190     self.incrementSeeds = []
191     self.preserveSeeds = []
192     if cfg_params.has_key('CMSSW.preserve_seeds'):
193     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
194     for tmp in tmpList:
195     tmp.strip()
196     self.preserveSeeds.append(tmp)
197     if cfg_params.has_key('CMSSW.increment_seeds'):
198     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
199     for tmp in tmpList:
200     tmp.strip()
201     self.incrementSeeds.append(tmp)
202    
203 fanzago 1.318 # Copy/return/publish
204 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
205     self.return_data = int(cfg_params.get('USER.return_data',0))
206 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
207     if (self.publish_data == 1):
208     if not cfg_params.has_key('USER.publish_data_name'):
209     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
210     else:
211     self.processedDataset = cfg_params['USER.publish_data_name']
212 ewv 1.329 """
213 fanzago 1.328 #### check of length of datasetname to publish ####
214 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
215 fanzago 1.328 print "test 100 char limit on datasetname"
216     ###
217     len_file = 0
218     print "self.output_file = ", self.output_file
219     for file in self.output_file:
220     length = len(file)
221     if length > len_file:
222     len_file = length
223 ewv 1.329 print "len_file = ", len_file
224 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
225 ewv 1.329 ###
226 fanzago 1.318 user = getUserName()
227 fanzago 1.328 len_user_name = len(user)
228 fanzago 1.318 common.logger.debug("user = " + user)
229 fanzago 1.328 print "len_user_name = ", len_user_name
230 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
231 fanzago 1.328
232 fanzago 1.318 len_processedDataset = len(self.processedDataset)
233     common.logger.debug("processedDataset " + self.processedDataset)
234     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
235 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
236 ewv 1.329
237 fanzago 1.318 if (self.datasetPath != None ):
238     len_primary = len(self.primaryDataset)
239     common.logger.debug("primaryDataset = " + self.primaryDataset)
240     common.logger.debug("len_primary = " + str(len_primary))
241 fanzago 1.328 if (len_primary > 100):
242     raise CrabException("Warning: primary datasetname has to be < 100 characters")
243     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
244     #if (len_processedDataset > (59 - len_user_name - len_primary)):
245     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
246     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
247     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
248 fanzago 1.318 else:
249 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
250     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
251     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
252     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
253 ewv 1.329 """
254 ewv 1.276
255     self.conf = {}
256     self.conf['pubdata'] = None
257 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
258 slacapra 1.1 #DBSDLS-start
259 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
260 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
261     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
262 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
263 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
264 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
265 gutsche 1.35 blockSites = {}
266 spiga 1.342 #wmbs
267     self.automation = int(self.cfg_params.get('WMBS.automation',0))
268     if self.automation == 0:
269     if self.datasetPath:
270     blockSites = self.DataDiscoveryAndLocation(cfg_params)
271     #DBSDLS-end
272     self.conf['blockSites']=blockSites
273 ewv 1.347
274 spiga 1.342 ## Select Splitting
275     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
276 ewv 1.347
277 spiga 1.342 if self.selectNoInput:
278     if self.pset == None:
279     self.algo = 'ForScript'
280     else:
281     self.algo = 'NoInput'
282     self.conf['managedGenerators']=self.managedGenerators
283     self.conf['generator']=self.generator
284 ewv 1.352 elif self.ads:
285 spiga 1.342 self.algo = 'LumiBased'
286     elif splitByRun ==1:
287     self.algo = 'RunBased'
288 spiga 1.42 else:
289 spiga 1.342 self.algo = 'EventBased'
290     common.logger.debug("Job splitting method: %s" % self.algo)
291 ewv 1.347
292 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
293     self.dict = splitter.Algos()[self.algo]()
294 gutsche 1.5
295 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
296     self.rootArgsFilename= 'arguments'
297 spiga 1.208 # modify Pset only the first time
298 spiga 1.320 if isNew:
299     if self.pset != None: self.ModifyPset()
300 spiga 1.300
301 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
302     self.tarNameWithPath = self.getTarBall(self.executable)
303 spiga 1.293
304    
305     def ModifyPset(self):
306     import PsetManipulator as pp
307 ewv 1.335
308     # If pycfg_params set, fake out the config script
309     # to make it think it was called with those args
310     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
311     if pycfg_params:
312     trueArgv = sys.argv
313     sys.argv = [self.pset]
314     sys.argv.extend(pycfg_params.split(' '))
315 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
316 ewv 1.335 if pycfg_params: # Restore original sys.argv
317     sys.argv = trueArgv
318    
319 spiga 1.293 try:
320     # Add FrameworkJobReport to parameter-set, set max events.
321     # Reset later for data jobs by writeCFG which does all modifications
322 ewv 1.295 PsetEdit.maxEvent(1)
323 spiga 1.293 PsetEdit.skipEvent(0)
324     PsetEdit.psetWriter(self.configFilename())
325     ## If present, add TFileService to output files
326 slacapra 1.349 if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
327 spiga 1.293 tfsOutput = PsetEdit.getTFileService()
328     if tfsOutput:
329     if tfsOutput in self.output_file:
330 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
331 spiga 1.293 else:
332     outfileflag = True #output found
333     self.output_file.append(tfsOutput)
334 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
335 spiga 1.293 pass
336     pass
337 ewv 1.321 # If present and requested, add PoolOutputModule to output files
338 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
339 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
340     if edmOutput:
341 ewv 1.321 for outputFile in edmOutput:
342     if outputFile in self.output_file:
343 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
344 ewv 1.321 else:
345     self.output_file.append(outputFile)
346     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
347     # not requested, check anyhow to avoid accidental T2 overload
348 slacapra 1.297 else:
349 ewv 1.321 if edmOutput:
350     missedFiles = []
351     for outputFile in edmOutput:
352     if outputFile not in self.output_file:
353     missedFiles.append(outputFile)
354     if missedFiles:
355     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
356     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
357     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
358     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
359     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
360     common.logger.info(msg)
361 spiga 1.322 else :
362 ewv 1.321 raise CrabException(msg)
363 ewv 1.301
364     if (PsetEdit.getBadFilesSetting()):
365     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
366 spiga 1.304 common.logger.info(msg)
367 ewv 1.301
368 slacapra 1.297 except CrabException, msg:
369 spiga 1.304 common.logger.info(str(msg))
370 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
371 spiga 1.293 raise CrabException(msg)
372    
373 gutsche 1.3
374 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
375    
376 slacapra 1.86 import DataDiscovery
377     import DataLocation
378 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
379 gutsche 1.3
380     datasetPath=self.datasetPath
381    
382 slacapra 1.1 ## Contact the DBS
383 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
384 slacapra 1.1 try:
385 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
386 slacapra 1.1 self.pubdata.fetchDBSInfo()
387    
388 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
389 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
390     raise CrabException(msg)
391 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
392 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
393     raise CrabException(msg)
394 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
395 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
396 slacapra 1.1 raise CrabException(msg)
397    
398 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
399 slacapra 1.270 #print self.filesbyblock
400 spiga 1.269 self.conf['pubdata']=self.pubdata
401 gutsche 1.3
402 slacapra 1.1 ## get max number of events
403 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
404 slacapra 1.1
405     ## Contact the DLS and build a list of sites hosting the fileblocks
406     try:
407 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
408 gutsche 1.6 dataloc.fetchDLSInfo()
409 slacapra 1.263
410 slacapra 1.41 except DataLocation.DataLocationError , ex:
411 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
412     raise CrabException(msg)
413 ewv 1.131
414 slacapra 1.1
415 slacapra 1.270 unsorted_sites = dataloc.getSites()
416     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
417     for lfn in self.filesbyblock.keys():
418     if unsorted_sites.has_key(lfn):
419     sites[lfn]=unsorted_sites[lfn]
420     else:
421     sites[lfn]=[]
422    
423 slacapra 1.264 if len(sites)==0:
424 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
425     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
426     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
427 slacapra 1.264 raise CrabException(msg)
428    
429 gutsche 1.35 allSites = []
430     listSites = sites.values()
431 slacapra 1.63 for listSite in listSites:
432     for oneSite in listSite:
433 gutsche 1.35 allSites.append(oneSite)
434 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
435 ewv 1.295
436 gutsche 1.3
437 gutsche 1.92 # screen output
438 ewv 1.352 if self.ads:
439     common.logger.info("Requested ADS %s has %s block(s)." %
440 ewv 1.350 (datasetPath, len(self.filesbyblock.keys())))
441     else:
442     common.logger.info("Requested dataset: " + datasetPath + \
443     " has " + str(self.maxEvents) + " events in " + \
444     str(len(self.filesbyblock.keys())) + " blocks.\n")
445 gutsche 1.92
446 gutsche 1.35 return sites
447 ewv 1.131
448 spiga 1.42
449 spiga 1.208 def split(self, jobParams,firstJobID):
450 ewv 1.276
451 spiga 1.293 jobParams = self.dict['args']
452 spiga 1.269 njobs = self.dict['njobs']
453     self.jobDestination = self.dict['jobDestination']
454 ewv 1.131
455 ewv 1.333 if njobs == 0:
456     raise CrabException("Asked to split zero jobs: aborting")
457     if not self.server and not self.local and njobs > 500:
458     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
459 slacapra 1.263
460 gutsche 1.3 # create the empty structure
461     for i in range(njobs):
462     jobParams.append("")
463 ewv 1.131
464 spiga 1.165 listID=[]
465     listField=[]
466 spiga 1.293 listDictions=[]
467 spiga 1.300 exist= os.path.exists(self.argsFile)
468 spiga 1.208 for id in range(njobs):
469     job = id + int(firstJobID)
470 spiga 1.167 listID.append(job+1)
471 spiga 1.162 job_ToSave ={}
472 spiga 1.169 concString = ' '
473 spiga 1.165 argu=''
474 spiga 1.293 str_argu = str(job+1)
475 spiga 1.208 if len(jobParams[id]):
476 ewv 1.295 argu = {'JobID': job+1}
477 spiga 1.293 for i in range(len(jobParams[id])):
478     argu[self.dict['params'][i]]=jobParams[id][i]
479 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
480 ewv 1.295 # just for debug
481 spiga 1.293 str_argu += concString.join(jobParams[id])
482 spiga 1.314 if argu != '': listDictions.append(argu)
483 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
484 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
485 spiga 1.165 listField.append(job_ToSave)
486 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
487     cms_se = CmsSEMap()
488 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
489 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
490 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
491     msg+="\t CMSDestination: %s "%(str(SEDestination))
492 spiga 1.307 common.logger.log(10-1,msg)
493 spiga 1.293 # write xml
494 ewv 1.295 if len(listDictions):
495 spiga 1.293 if exist==False: self.CreateXML()
496     self.addEntry(listDictions)
497 spiga 1.320 # self.zipXMLfile()
498 spiga 1.187 common._db.updateJob_(listID,listField)
499 spiga 1.293 return
500 ewv 1.313
501 spiga 1.320 # def zipXMLfile(self):
502 ewv 1.313
503 spiga 1.320 # import tarfile
504     # try:
505     # tar = tarfile.open(self.tarNameWithPath, "a")
506     # tar.add(self.argsFile, os.path.basename(self.argsFile))
507     # tar.close()
508     # except IOError, exc:
509     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
510     # msg += str(exc)
511     # raise CrabException(msg)
512     # except tarfile.TarError, exc:
513     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
514     # msg += str(exc)
515     # raise CrabException(msg)
516 ewv 1.325
517 spiga 1.293 def CreateXML(self):
518     """
519 ewv 1.295 """
520 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
521     outfile = file( self.argsFile, 'w').write(str(result))
522 ewv 1.295 return
523 spiga 1.293
524     def addEntry(self, listDictions):
525     """
526     _addEntry_
527 ewv 1.295
528 spiga 1.293 add an entry to the xml file
529     """
530     from IMProv.IMProvLoader import loadIMProvFile
531     ## load xml
532 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
533 spiga 1.293 entrname= 'Job'
534     for dictions in listDictions:
535     report = IMProvNode(entrname , None, **dictions)
536     improvDoc.addNode(report)
537 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
538 gutsche 1.3 return
539 ewv 1.131
540 gutsche 1.3 def numberOfJobs(self):
541 spiga 1.342 #wmbs
542 ewv 1.347 if self.automation==0:
543 spiga 1.342 return self.dict['njobs']
544     else:
545     return None
546 ewv 1.347
547 slacapra 1.1 def getTarBall(self, exe):
548     """
549     Return the TarBall with lib and exe
550     """
551 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
552     if os.path.exists(self.tgzNameWithPath):
553     return self.tgzNameWithPath
554 slacapra 1.1
555     # Prepare a tar gzipped file with user binaries.
556     self.buildTar_(exe)
557    
558 spiga 1.320 return string.strip(self.tgzNameWithPath)
559 slacapra 1.1
560     def buildTar_(self, executable):
561    
562     # First of all declare the user Scram area
563     swArea = self.scram.getSWArea_()
564     swReleaseTop = self.scram.getReleaseTop_()
565 ewv 1.131
566 slacapra 1.1 ## check if working area is release top
567     if swReleaseTop == '' or swArea == swReleaseTop:
568 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
569 slacapra 1.1 return
570    
571 slacapra 1.61 import tarfile
572     try: # create tar ball
573 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
574 slacapra 1.61 ## First find the executable
575 slacapra 1.86 if (self.executable != ''):
576 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
577     if ( not exeWithPath ):
578     raise CrabException('User executable '+executable+' not found')
579 ewv 1.131
580 slacapra 1.61 ## then check if it's private or not
581     if exeWithPath.find(swReleaseTop) == -1:
582     # the exe is private, so we must ship
583 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
584 slacapra 1.61 path = swArea+'/'
585 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
586     if exeWithPath.find(path) >= 0 :
587     exe = string.replace(exeWithPath, path,'')
588 slacapra 1.129 tar.add(path+exe,exe)
589 corvo 1.85 else :
590     tar.add(exeWithPath,os.path.basename(executable))
591 slacapra 1.61 pass
592     else:
593     # the exe is from release, we'll find it on WN
594     pass
595 ewv 1.131
596 slacapra 1.61 ## Now get the libraries: only those in local working area
597 slacapra 1.256 tar.dereference=True
598 slacapra 1.61 libDir = 'lib'
599     lib = swArea+'/' +libDir
600 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
601 slacapra 1.61 if os.path.exists(lib):
602     tar.add(lib,libDir)
603 ewv 1.131
604 slacapra 1.61 ## Now check if module dir is present
605     moduleDir = 'module'
606     module = swArea + '/' + moduleDir
607     if os.path.isdir(module):
608     tar.add(module,moduleDir)
609 slacapra 1.256 tar.dereference=False
610 slacapra 1.61
611     ## Now check if any data dir(s) is present
612 spiga 1.179 self.dataExist = False
613 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
614 slacapra 1.206 while len(todo_list):
615     entry, name = todo_list.pop()
616 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
617 slacapra 1.206 continue
618 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
619 slacapra 1.206 entryPath = entry + '/'
620 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
621 slacapra 1.206 if name == 'data':
622     self.dataExist=True
623 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
624 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
625 slacapra 1.206 pass
626     pass
627 ewv 1.182
628 spiga 1.179 ### CMSSW ParameterSet
629     if not self.pset is None:
630     cfg_file = common.work_space.jobDir()+self.configFilename()
631 ewv 1.182 tar.add(cfg_file,self.configFilename())
632 ewv 1.313
633 spiga 1.309 try:
634     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
635     tar.add(crab_cfg_file,'crab.cfg')
636     except:
637     pass
638 fanzago 1.93
639 fanzago 1.152 ## Add ProdCommon dir to tar
640 slacapra 1.211 prodcommonDir = './'
641     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
642 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
643 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
644     'WMCore/__init__.py','WMCore/Algorithms']
645 slacapra 1.214 for file in neededStuff:
646     tar.add(prodcommonPath+file,prodcommonDir+file)
647 spiga 1.179
648     ##### ML stuff
649     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
650     path=os.environ['CRABDIR'] + '/python/'
651     for file in ML_file_list:
652     tar.add(path+file,file)
653    
654     ##### Utils
655 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
656 spiga 1.179 for file in Utils_file_list:
657     tar.add(path+file,file)
658 ewv 1.131
659 ewv 1.182 ##### AdditionalFiles
660 slacapra 1.253 tar.dereference=True
661 spiga 1.179 for file in self.additional_inbox_files:
662     tar.add(file,string.split(file,'/')[-1])
663 slacapra 1.253 tar.dereference=False
664 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
665 ewv 1.182
666 slacapra 1.61 tar.close()
667 mcinquil 1.241 except IOError, exc:
668 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
669 spiga 1.304 msg += str(exc)
670     raise CrabException(msg)
671 mcinquil 1.241 except tarfile.TarError, exc:
672 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
673 spiga 1.304 msg += str(exc)
674     raise CrabException(msg)
675 spiga 1.300
676 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
677     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
678 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
679 ewv 1.250 +'MB input sandbox limit \n'
680 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
681     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
682 mcinquil 1.336 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServerForUsers#Server_available_for_users'
683 spiga 1.238 raise CrabException(msg)
684 gutsche 1.72
685 slacapra 1.61 ## create tar-ball with ML stuff
686 slacapra 1.97
687 spiga 1.165 def wsSetupEnvironment(self, nj=0):
688 slacapra 1.1 """
689     Returns part of a job script which prepares
690     the execution environment for the job 'nj'.
691     """
692 ewv 1.334 psetName = 'pset.py'
693    
694 slacapra 1.1 # Prepare JobType-independent part
695 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
696 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
697 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
698     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
699     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
700 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
701 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
702 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
703 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
704 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
705 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
706     txt += ' job_exit_code=10016\n'
707     txt += ' func_exit\n'
708 gutsche 1.3 txt += ' fi\n'
709 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
710 gutsche 1.3 txt += '\n'
711     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
712     txt += ' cd $WORKING_DIR\n'
713 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
714 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
715 spiga 1.282 #Setup SGE Environment
716 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
717 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
718    
719 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
720     txt += self.wsSetupCMSLCGEnvironment_()
721    
722 mcinquil 1.340 #Setup PBS Environment
723 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
724 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
725    
726 gutsche 1.3 txt += 'fi\n'
727 slacapra 1.1
728     # Prepare JobType-specific part
729     scram = self.scram.commandName()
730     txt += '\n\n'
731 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
732     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
733 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
734     txt += 'status=$?\n'
735     txt += 'if [ $status != 0 ] ; then\n'
736 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
737     txt += ' job_exit_code=10034\n'
738 fanzago 1.163 txt += ' func_exit\n'
739 slacapra 1.1 txt += 'fi \n'
740     txt += 'cd '+self.version+'\n'
741 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
742 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
743 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
744 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
745     txt += ' echo "ERROR ==> Problem with the command: "\n'
746     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
747     txt += ' job_exit_code=10034\n'
748     txt += ' func_exit\n'
749     txt += 'fi \n'
750 slacapra 1.1 # Handle the arguments:
751     txt += "\n"
752 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
753 slacapra 1.1 txt += "\n"
754 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
755 slacapra 1.1 txt += "then\n"
756 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
757     txt += ' job_exit_code=50113\n'
758     txt += " func_exit\n"
759 slacapra 1.1 txt += "fi\n"
760     txt += "\n"
761    
762     # Prepare job-specific part
763     job = common.job_list[nj]
764 ewv 1.131 if (self.datasetPath):
765 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
766     #DataTier = self.datasetPath.split("/")[2]
767 fanzago 1.93 txt += '\n'
768     txt += 'DatasetPath='+self.datasetPath+'\n'
769    
770 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
771 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
772 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
773 fanzago 1.93
774     else:
775 fanzago 1.318 #self.primaryDataset = 'null'
776 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
777     txt += 'PrimaryDataset=null\n'
778     txt += 'DataTier=null\n'
779     txt += 'ApplicationFamily=MCDataTier\n'
780 ewv 1.170 if self.pset != None:
781 spiga 1.42 pset = os.path.basename(job.configFilename())
782     txt += '\n'
783 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
784 spiga 1.296
785 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
786     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
787     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
788     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
789 slacapra 1.90
790 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
791 ewv 1.319 else:
792 spiga 1.314 txt += '\n'
793 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
794 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
795 gutsche 1.3 return txt
796 slacapra 1.176
797 fanzago 1.166 def wsUntarSoftware(self, nj=0):
798 gutsche 1.3 """
799     Put in the script the commands to build an executable
800     or a library.
801     """
802    
803 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
804 gutsche 1.3
805     if os.path.isfile(self.tgzNameWithPath):
806 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
807 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
808 fanzago 1.285 if self.debug_wrapper==1 :
809 spiga 1.199 txt += 'ls -Al \n'
810 gutsche 1.3 txt += 'untar_status=$? \n'
811     txt += 'if [ $untar_status -ne 0 ]; then \n'
812 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
813     txt += ' job_exit_code=$untar_status\n'
814     txt += ' func_exit\n'
815 gutsche 1.3 txt += 'else \n'
816     txt += ' echo "Successful untar" \n'
817     txt += 'fi \n'
818 gutsche 1.50 txt += '\n'
819 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
820 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
821 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
822 gutsche 1.50 txt += 'else\n'
823 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
824 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
825 gutsche 1.50 txt += 'fi\n'
826     txt += '\n'
827    
828 gutsche 1.3 pass
829 ewv 1.131
830 slacapra 1.1 return txt
831 ewv 1.170
832 fanzago 1.166 def wsBuildExe(self, nj=0):
833     """
834     Put in the script the commands to build an executable
835     or a library.
836     """
837    
838     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
839     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
840    
841 ewv 1.170 txt += 'rm -r lib/ module/ \n'
842     txt += 'mv $RUNTIME_AREA/lib/ . \n'
843     txt += 'mv $RUNTIME_AREA/module/ . \n'
844 spiga 1.186 if self.dataExist == True:
845     txt += 'rm -r src/ \n'
846     txt += 'mv $RUNTIME_AREA/src/ . \n'
847 ewv 1.182 if len(self.additional_inbox_files)>0:
848 spiga 1.179 for file in self.additional_inbox_files:
849 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
850 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
851     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
852 ewv 1.170
853 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
854 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
855 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
856 fanzago 1.166 txt += 'else\n'
857 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
858 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
859     txt += 'fi\n'
860     txt += '\n'
861    
862 slacapra 1.302 if self.pset != None:
863 ewv 1.334 psetName = 'pset.py'
864    
865 slacapra 1.302 txt += '\n'
866     if self.debug_wrapper == 1:
867     txt += 'echo "***** cat ' + psetName + ' *********"\n'
868     txt += 'cat ' + psetName + '\n'
869     txt += 'echo "****** end ' + psetName + ' ********"\n'
870     txt += '\n'
871     txt += 'echo "***********************" \n'
872     txt += 'which edmConfigHash \n'
873     txt += 'echo "***********************" \n'
874 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
875     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
876 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
877     #### FEDE temporary fix for noEdm files #####
878     txt += 'if [ -z "$PSETHASH" ]; then \n'
879     txt += ' export PSETHASH=null\n'
880     txt += 'fi \n'
881     #############################################
882     txt += '\n'
883 fanzago 1.166 return txt
884 slacapra 1.1
885 ewv 1.131
886 slacapra 1.1 def executableName(self):
887 ewv 1.192 if self.scriptExe:
888 spiga 1.42 return "sh "
889     else:
890     return self.executable
891 slacapra 1.1
892     def executableArgs(self):
893 ewv 1.276 if self.scriptExe:
894 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
895 fanzago 1.115 else:
896 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
897 slacapra 1.1
898     def inputSandbox(self, nj):
899     """
900     Returns a list of filenames to be put in JDL input sandbox.
901     """
902     inp_box = []
903     if os.path.isfile(self.tgzNameWithPath):
904     inp_box.append(self.tgzNameWithPath)
905 spiga 1.320 if os.path.isfile(self.argsFile):
906     inp_box.append(self.argsFile)
907 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
908 slacapra 1.1 return inp_box
909    
910     def outputSandbox(self, nj):
911     """
912     Returns a list of filenames to be put in JDL output sandbox.
913     """
914     out_box = []
915    
916     ## User Declared output files
917 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
918 ewv 1.131 n_out = nj + 1
919 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
920 slacapra 1.1 return out_box
921    
922    
923     def wsRenameOutput(self, nj):
924     """
925     Returns part of a job script which renames the produced files.
926     """
927    
928 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
929 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
930     txt += 'echo ">>> current directory content:"\n'
931 fanzago 1.285 if self.debug_wrapper==1:
932 spiga 1.199 txt += 'ls -Al\n'
933 fanzago 1.145 txt += '\n'
934 slacapra 1.54
935 fanzago 1.128 for fileWithSuffix in (self.output_file):
936 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
937 slacapra 1.1 txt += '\n'
938 gutsche 1.7 txt += '# check output file\n'
939 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
940 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
941     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
942 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
943 ewv 1.147 else:
944     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
945     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
946 slacapra 1.106 txt += 'else\n'
947 fanzago 1.161 txt += ' job_exit_code=60302\n'
948     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
949 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
950 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
951     txt += ' echo "prepare dummy output file"\n'
952     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
953     txt += ' fi \n'
954 slacapra 1.1 txt += 'fi\n'
955 slacapra 1.105 file_list = []
956     for fileWithSuffix in (self.output_file):
957 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
958 ewv 1.131
959 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
960 fanzago 1.149 txt += '\n'
961 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
962     txt += 'echo ">>> current directory content:"\n'
963 fanzago 1.285 if self.debug_wrapper==1:
964 spiga 1.199 txt += 'ls -Al\n'
965 fanzago 1.148 txt += '\n'
966 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
967 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
968 slacapra 1.1 return txt
969    
970 slacapra 1.63 def getRequirements(self, nj=[]):
971 slacapra 1.1 """
972 ewv 1.131 return job requirements to add to jdl files
973 slacapra 1.1 """
974     req = ''
975 slacapra 1.47 if self.version:
976 slacapra 1.10 req='Member("VO-cms-' + \
977 slacapra 1.47 self.version + \
978 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
979 ewv 1.192 if self.executable_arch:
980 gutsche 1.107 req+=' && Member("VO-cms-' + \
981 slacapra 1.105 self.executable_arch + \
982     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
983 gutsche 1.35
984     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
985 spiga 1.353 if ( common.scheduler.name() in ["glite"] ):
986 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
987     if ( self.cfg_params.get('GRID.use_cream',None) ):
988     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
989     else:
990     req += ' && other.GlueCEStateStatus == "Production" '
991 gutsche 1.35
992 slacapra 1.1 return req
993 gutsche 1.3
994     def configFilename(self):
995     """ return the config filename """
996 ewv 1.334 return self.name()+'.py'
997 gutsche 1.3
998     def wsSetupCMSOSGEnvironment_(self):
999     """
1000     Returns part of a job script which is prepares
1001     the execution environment and which is common for all CMS jobs.
1002     """
1003 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1004     txt += ' echo ">>> setup CMS OSG environment:"\n'
1005 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1006     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1007 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1008 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1009 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1010 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1011     txt += ' else\n'
1012 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1013     txt += ' job_exit_code=10020\n'
1014     txt += ' func_exit\n'
1015 fanzago 1.133 txt += ' fi\n'
1016 gutsche 1.3 txt += '\n'
1017 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1018 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1019 gutsche 1.3
1020     return txt
1021 ewv 1.131
1022 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1023     """
1024     Returns part of a job script which is prepares
1025     the execution environment and which is common for all CMS jobs.
1026     """
1027 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1028     txt += ' echo ">>> setup CMS LCG environment:"\n'
1029 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1030     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1031     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1032     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1033 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1034     txt += ' job_exit_code=10031\n'
1035     txt += ' func_exit\n'
1036 fanzago 1.133 txt += ' else\n'
1037     txt += ' echo "Sourcing environment... "\n'
1038     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1039 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1040     txt += ' job_exit_code=10020\n'
1041     txt += ' func_exit\n'
1042 fanzago 1.133 txt += ' fi\n'
1043     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1044     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1045     txt += ' result=$?\n'
1046     txt += ' if [ $result -ne 0 ]; then\n'
1047 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1048     txt += ' job_exit_code=10032\n'
1049     txt += ' func_exit\n'
1050 fanzago 1.133 txt += ' fi\n'
1051     txt += ' fi\n'
1052     txt += ' \n'
1053 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1054 gutsche 1.3 return txt
1055 gutsche 1.5
1056 spiga 1.238 def wsModifyReport(self, nj):
1057 fanzago 1.93 """
1058 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1059 fanzago 1.93 """
1060 ewv 1.250
1061 fanzago 1.281 txt = ''
1062 fanzago 1.292 if (self.copy_data == 1):
1063 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1064 ewv 1.283
1065 spiga 1.238
1066 fanzago 1.344 txt += 'if [ $StageOutExitStatus -eq 0 ] || [ $StageOutExitStatus -eq 60308 ] ; then\n'
1067 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1068 fanzago 1.175 txt += 'else\n'
1069     txt += ' FOR_LFN=/copy_problems/ \n'
1070     txt += 'fi\n'
1071 ewv 1.182
1072 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1073 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1074 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1075 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1076     txt += 'echo "endpoint = $endpoint"\n'
1077     txt += 'SE_PATH=$endpoint\n'
1078     txt += 'echo "SE_PATH = $endpoint"\n'
1079 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1080     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1081 fanzago 1.344
1082 fanzago 1.281
1083 farinafa 1.346 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $OutUniqueID for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1084 fanzago 1.318 if (self.publish_data == 1):
1085     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1086 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1087     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1088 fanzago 1.281
1089 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1090     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1091 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1092     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1093     txt += ' modifyReport_result=70500\n'
1094     txt += ' job_exit_code=$modifyReport_result\n'
1095     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1096     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1097     txt += 'else\n'
1098     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1099 spiga 1.103 txt += 'fi\n'
1100 fanzago 1.93 return txt
1101 ewv 1.283
1102 ewv 1.192 def wsParseFJR(self):
1103 spiga 1.189 """
1104 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1105 spiga 1.189 """
1106     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1107     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1108     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1109     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1110 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1111 fanzago 1.285 if self.debug_wrapper==1 :
1112 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1113     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1114 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1115     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1116 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1117 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1118 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1119     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1120 spiga 1.189 txt += ' else\n'
1121     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1122     txt += ' fi\n'
1123     txt += ' else\n'
1124     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1125     txt += ' fi\n'
1126     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1127 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1128 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1129 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1130 spiga 1.296 """
1131 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1132 spiga 1.189 # VERIFY PROCESSED DATA
1133 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1134     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1135     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1136     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1137     txt += ' mv tmp.txt input-files.txt\n'
1138     txt += ' echo "cat input-files.txt"\n'
1139     txt += ' echo "----------------------"\n'
1140     txt += ' cat input-files.txt\n'
1141     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1142     txt += ' mv tmp.txt processed-files.txt\n'
1143     txt += ' echo "----------------------"\n'
1144     txt += ' echo "cat processed-files.txt"\n'
1145     txt += ' echo "----------------------"\n'
1146     txt += ' cat processed-files.txt\n'
1147     txt += ' echo "----------------------"\n'
1148 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1149 fanzago 1.273 txt += ' fileverify_status=$?\n'
1150     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1151     txt += ' executable_exit_status=30001\n'
1152     txt += ' echo "ERROR ==> not all input files processed"\n'
1153     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1154     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1155     txt += ' fi\n'
1156 spiga 1.296 """
1157 spiga 1.232 txt += ' fi\n'
1158 spiga 1.189 txt += 'else\n'
1159     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1160     txt += 'fi\n'
1161     txt += '\n'
1162 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1163 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1164     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1165     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1166     txt += ' job_exit_code=$executable_exit_status\n'
1167     txt += ' func_exit\n'
1168     txt += 'fi\n\n'
1169 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1170     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1171     txt += 'job_exit_code=$executable_exit_status\n'
1172    
1173     return txt
1174    
1175 gutsche 1.5 def setParam_(self, param, value):
1176     self._params[param] = value
1177    
1178     def getParams(self):
1179     return self._params
1180 gutsche 1.8
1181 spiga 1.257 def outList(self,list=False):
1182 mcinquil 1.121 """
1183     check the dimension of the output files
1184     """
1185 spiga 1.169 txt = ''
1186     txt += 'echo ">>> list of expected files on output sandbox"\n'
1187 mcinquil 1.121 listOutFiles = []
1188 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1189 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1190 spiga 1.268 if len(self.output_file) <= 0:
1191     msg ="WARNING: no output files name have been defined!!\n"
1192     msg+="\tno output files will be reported back/staged\n"
1193 spiga 1.304 common.logger.info(msg)
1194 ewv 1.350
1195 fanzago 1.148 if (self.return_data == 1):
1196 farinafa 1.348 for file in (self.output_file):
1197     listOutFiles.append(numberFile(file, '$OutUniqueID'))
1198     for file in (self.output_file_sandbox):
1199     listOutFiles.append(numberFile(file, '$NJob'))
1200     listOutFiles.append(stdout)
1201     listOutFiles.append(stderr)
1202    
1203 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1204 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1205 spiga 1.169 txt += 'export filesToCheck\n'
1206 spiga 1.341 taskinfo={}
1207     taskinfo['outfileBasename'] = self.output_file
1208     common._db.updateTask_(taskinfo)
1209 ewv 1.276
1210 spiga 1.257 if list : return self.output_file
1211 ewv 1.170 return txt