ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.341
Committed: Tue Oct 13 08:21:23 2009 UTC (15 years, 6 months ago) by spiga
Content type: text/x-python
Branch: MAIN
Changes since 1.340: +5 -2 lines
Log Message:
fill ouptputBaseName field

File Contents

# User Rev Content
1 ewv 1.327
2 spiga 1.341 __revision__ = "$Id: cms_cmssw.py,v 1.340 2009/10/08 15:15:17 mcinquil Exp $"
3     __version__ = "$Revision: 1.340 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 spiga 1.296 self.argsList = 1
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.338 ll = len(self.datasetPath.split("/"))
92     if (ll < 4):
93     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
94     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
95     raise CrabException(msg)
96 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
97     self.dataTier = self.datasetPath.split("/")[2]
98 gutsche 1.5
99 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
100 ewv 1.330 self.ads = False
101     if self.datasetPath:
102     self.ads = len(self.datasetPath.split("/")) > 4
103 ewv 1.295
104 ewv 1.327 # FUTURE: Can remove this check
105     if self.ads and self.CMSSW_major < 3:
106     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
107     common.logger.info(' Only file level, not lumi level, granularity is supported.')
108    
109 spiga 1.288 self.debugWrap=''
110 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
111     if self.debug_wrapper == 1: self.debugWrap='--debug'
112 slacapra 1.291
113 slacapra 1.1 ## now the application
114 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
115 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
116 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
117 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
118 slacapra 1.1
119 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
120 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
121 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
122 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
123 slacapra 1.153 if self.pset.lower() != 'none' :
124     if (not os.path.exists(self.pset)):
125     raise CrabException("User defined PSet file "+self.pset+" does not exist")
126     else:
127     self.pset = None
128 slacapra 1.1
129     # output files
130 slacapra 1.53 ## stuff which must be returned always via sandbox
131     self.output_file_sandbox = []
132    
133     # add fjr report by default via sandbox
134     self.output_file_sandbox.append(self.fjrFileName)
135    
136     # other output files to be returned via sandbox or copied to SE
137 mcinquil 1.216 outfileflag = False
138 slacapra 1.153 self.output_file = []
139     tmp = cfg_params.get('CMSSW.output_file',None)
140     if tmp :
141 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
142 mcinquil 1.216 outfileflag = True #output found
143     #else:
144     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1
146     # script_exe file as additional file in inputSandbox
147 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
148     if self.scriptExe :
149 slacapra 1.176 if not os.path.isfile(self.scriptExe):
150     msg ="ERROR. file "+self.scriptExe+" not found"
151     raise CrabException(msg)
152     self.additional_inbox_files.append(string.strip(self.scriptExe))
153 slacapra 1.70
154 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
155     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
156    
157 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
158 slacapra 1.176 msg ="Error. script_exe not defined"
159     raise CrabException(msg)
160 spiga 1.42
161 ewv 1.226 # use parent files...
162 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
163 spiga 1.204
164 slacapra 1.1 ## additional input files
165 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
166 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
167 slacapra 1.70 for tmp in tmpAddFiles:
168     tmp = string.strip(tmp)
169     dirname = ''
170     if not tmp[0]=="/": dirname = "."
171 corvo 1.85 files = []
172     if string.find(tmp,"*")>-1:
173     files = glob.glob(os.path.join(dirname, tmp))
174     if len(files)==0:
175     raise CrabException("No additional input file found with this pattern: "+tmp)
176     else:
177     files.append(tmp)
178 slacapra 1.70 for file in files:
179     if not os.path.exists(file):
180     raise CrabException("Additional input file not found: "+file)
181 slacapra 1.45 pass
182 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
183 slacapra 1.1 pass
184     pass
185 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
186 slacapra 1.153 pass
187 gutsche 1.3
188 gutsche 1.35
189 ewv 1.160 ## New method of dealing with seeds
190     self.incrementSeeds = []
191     self.preserveSeeds = []
192     if cfg_params.has_key('CMSSW.preserve_seeds'):
193     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
194     for tmp in tmpList:
195     tmp.strip()
196     self.preserveSeeds.append(tmp)
197     if cfg_params.has_key('CMSSW.increment_seeds'):
198     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
199     for tmp in tmpList:
200     tmp.strip()
201     self.incrementSeeds.append(tmp)
202    
203 fanzago 1.318 # Copy/return/publish
204 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
205     self.return_data = int(cfg_params.get('USER.return_data',0))
206 fanzago 1.318 ### FEDE ###
207     self.publish_data = int(cfg_params.get('USER.publish_data',0))
208     if (self.publish_data == 1):
209     if not cfg_params.has_key('USER.publish_data_name'):
210     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
211     else:
212     self.processedDataset = cfg_params['USER.publish_data_name']
213 ewv 1.329 """
214 fanzago 1.328 #### check of length of datasetname to publish ####
215 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
216 fanzago 1.328 print "test 100 char limit on datasetname"
217     ###
218     len_file = 0
219     print "self.output_file = ", self.output_file
220     for file in self.output_file:
221     length = len(file)
222     if length > len_file:
223     len_file = length
224 ewv 1.329 print "len_file = ", len_file
225 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
226 ewv 1.329 ###
227 fanzago 1.318 user = getUserName()
228 fanzago 1.328 len_user_name = len(user)
229 fanzago 1.318 common.logger.debug("user = " + user)
230 fanzago 1.328 print "len_user_name = ", len_user_name
231 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
232 fanzago 1.328
233 fanzago 1.318 len_processedDataset = len(self.processedDataset)
234     common.logger.debug("processedDataset " + self.processedDataset)
235     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
236 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
237 ewv 1.329
238 fanzago 1.318 if (self.datasetPath != None ):
239     len_primary = len(self.primaryDataset)
240     common.logger.debug("primaryDataset = " + self.primaryDataset)
241     common.logger.debug("len_primary = " + str(len_primary))
242 fanzago 1.328 if (len_primary > 100):
243     raise CrabException("Warning: primary datasetname has to be < 100 characters")
244     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
245     #if (len_processedDataset > (59 - len_user_name - len_primary)):
246     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
247     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
248     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
249 fanzago 1.318 else:
250 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
251     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
252     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
253     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
254 ewv 1.329 """
255 ewv 1.276
256     self.conf = {}
257     self.conf['pubdata'] = None
258 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
259 slacapra 1.1 #DBSDLS-start
260 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
261 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
262     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
263 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
264 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
265 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
266 gutsche 1.35 blockSites = {}
267 slacapra 1.9 if self.datasetPath:
268 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
269 ewv 1.131 #DBSDLS-end
270 spiga 1.269 self.conf['blockSites']=blockSites
271    
272 slacapra 1.9 ## Select Splitting
273 spiga 1.269 splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
274    
275 ewv 1.131 if self.selectNoInput:
276 spiga 1.187 if self.pset == None:
277 ewv 1.276 self.algo = 'ForScript'
278 spiga 1.42 else:
279 spiga 1.271 self.algo = 'NoInput'
280 ewv 1.276 self.conf['managedGenerators']=self.managedGenerators
281     self.conf['generator']=self.generator
282 ewv 1.326 elif self.ads:
283     self.algo = 'LumiBased'
284 ewv 1.276 elif splitByRun ==1:
285     self.algo = 'RunBased'
286 spiga 1.269 else:
287 ewv 1.276 self.algo = 'EventBased'
288 ewv 1.326 common.logger.debug("Job splitting method: %s" % self.algo)
289 ewv 1.276
290     splitter = JobSplitter(self.cfg_params,self.conf)
291 spiga 1.269 self.dict = splitter.Algos()[self.algo]()
292 gutsche 1.5
293 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
294     self.rootArgsFilename= 'arguments'
295 spiga 1.208 # modify Pset only the first time
296 spiga 1.320 if isNew:
297     if self.pset != None: self.ModifyPset()
298 spiga 1.300
299 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
300     self.tarNameWithPath = self.getTarBall(self.executable)
301 spiga 1.293
302    
303     def ModifyPset(self):
304     import PsetManipulator as pp
305 ewv 1.335
306     # If pycfg_params set, fake out the config script
307     # to make it think it was called with those args
308     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
309     if pycfg_params:
310     trueArgv = sys.argv
311     sys.argv = [self.pset]
312     sys.argv.extend(pycfg_params.split(' '))
313 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
314 ewv 1.335 if pycfg_params: # Restore original sys.argv
315     sys.argv = trueArgv
316    
317 spiga 1.293 try:
318     # Add FrameworkJobReport to parameter-set, set max events.
319     # Reset later for data jobs by writeCFG which does all modifications
320 ewv 1.295 PsetEdit.maxEvent(1)
321 spiga 1.293 PsetEdit.skipEvent(0)
322     PsetEdit.psetWriter(self.configFilename())
323     ## If present, add TFileService to output files
324     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
325     tfsOutput = PsetEdit.getTFileService()
326     if tfsOutput:
327     if tfsOutput in self.output_file:
328 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
329 spiga 1.293 else:
330     outfileflag = True #output found
331     self.output_file.append(tfsOutput)
332 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
333 spiga 1.293 pass
334     pass
335 ewv 1.321 # If present and requested, add PoolOutputModule to output files
336 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
337 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
338     if edmOutput:
339 ewv 1.321 for outputFile in edmOutput:
340     if outputFile in self.output_file:
341 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
342 ewv 1.321 else:
343     self.output_file.append(outputFile)
344     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
345     # not requested, check anyhow to avoid accidental T2 overload
346 slacapra 1.297 else:
347 ewv 1.321 if edmOutput:
348     missedFiles = []
349     for outputFile in edmOutput:
350     if outputFile not in self.output_file:
351     missedFiles.append(outputFile)
352     if missedFiles:
353     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
354     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
355     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
356     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
357     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
358     common.logger.info(msg)
359 spiga 1.322 else :
360 ewv 1.321 raise CrabException(msg)
361 ewv 1.301
362     if (PsetEdit.getBadFilesSetting()):
363     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
364 spiga 1.304 common.logger.info(msg)
365 ewv 1.301
366 slacapra 1.297 except CrabException, msg:
367 spiga 1.304 common.logger.info(str(msg))
368 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
369 spiga 1.293 raise CrabException(msg)
370    
371 gutsche 1.3
372 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
373    
374 slacapra 1.86 import DataDiscovery
375     import DataLocation
376 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
377 gutsche 1.3
378     datasetPath=self.datasetPath
379    
380 slacapra 1.1 ## Contact the DBS
381 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
382 slacapra 1.1 try:
383 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
384 slacapra 1.1 self.pubdata.fetchDBSInfo()
385    
386 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
387 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
388     raise CrabException(msg)
389 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
390 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
391     raise CrabException(msg)
392 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
393 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
394 slacapra 1.1 raise CrabException(msg)
395    
396 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
397 slacapra 1.270 #print self.filesbyblock
398 spiga 1.269 self.conf['pubdata']=self.pubdata
399 gutsche 1.3
400 slacapra 1.1 ## get max number of events
401 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
402 slacapra 1.1
403     ## Contact the DLS and build a list of sites hosting the fileblocks
404     try:
405 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
406 gutsche 1.6 dataloc.fetchDLSInfo()
407 slacapra 1.263
408 slacapra 1.41 except DataLocation.DataLocationError , ex:
409 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
410     raise CrabException(msg)
411 ewv 1.131
412 slacapra 1.1
413 slacapra 1.270 unsorted_sites = dataloc.getSites()
414     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
415     for lfn in self.filesbyblock.keys():
416     if unsorted_sites.has_key(lfn):
417     sites[lfn]=unsorted_sites[lfn]
418     else:
419     sites[lfn]=[]
420    
421 slacapra 1.264 if len(sites)==0:
422 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
423     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
424     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
425 slacapra 1.264 raise CrabException(msg)
426    
427 gutsche 1.35 allSites = []
428     listSites = sites.values()
429 slacapra 1.63 for listSite in listSites:
430     for oneSite in listSite:
431 gutsche 1.35 allSites.append(oneSite)
432 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
433 ewv 1.295
434 gutsche 1.3
435 gutsche 1.92 # screen output
436 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
437 gutsche 1.92
438 gutsche 1.35 return sites
439 ewv 1.131
440 spiga 1.42
441 spiga 1.208 def split(self, jobParams,firstJobID):
442 ewv 1.276
443 spiga 1.293 jobParams = self.dict['args']
444 spiga 1.269 njobs = self.dict['njobs']
445     self.jobDestination = self.dict['jobDestination']
446 ewv 1.131
447 ewv 1.333 if njobs == 0:
448     raise CrabException("Asked to split zero jobs: aborting")
449     if not self.server and not self.local and njobs > 500:
450     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
451 slacapra 1.263
452 gutsche 1.3 # create the empty structure
453     for i in range(njobs):
454     jobParams.append("")
455 ewv 1.131
456 spiga 1.165 listID=[]
457     listField=[]
458 spiga 1.293 listDictions=[]
459 spiga 1.300 exist= os.path.exists(self.argsFile)
460 spiga 1.208 for id in range(njobs):
461     job = id + int(firstJobID)
462 spiga 1.167 listID.append(job+1)
463 spiga 1.162 job_ToSave ={}
464 spiga 1.169 concString = ' '
465 spiga 1.165 argu=''
466 spiga 1.293 str_argu = str(job+1)
467 spiga 1.208 if len(jobParams[id]):
468 ewv 1.295 argu = {'JobID': job+1}
469 spiga 1.293 for i in range(len(jobParams[id])):
470     argu[self.dict['params'][i]]=jobParams[id][i]
471 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
472 ewv 1.295 # just for debug
473 spiga 1.293 str_argu += concString.join(jobParams[id])
474 spiga 1.314 if argu != '': listDictions.append(argu)
475 spiga 1.298 job_ToSave['arguments']= str(job+1)
476 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
477 spiga 1.165 listField.append(job_ToSave)
478 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
479     cms_se = CmsSEMap()
480 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
481 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
482 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
483     msg+="\t CMSDestination: %s "%(str(SEDestination))
484 spiga 1.307 common.logger.log(10-1,msg)
485 spiga 1.293 # write xml
486 ewv 1.295 if len(listDictions):
487 spiga 1.293 if exist==False: self.CreateXML()
488     self.addEntry(listDictions)
489 spiga 1.320 # self.zipXMLfile()
490 spiga 1.187 common._db.updateJob_(listID,listField)
491 spiga 1.293 return
492 ewv 1.313
493 spiga 1.320 # def zipXMLfile(self):
494 ewv 1.313
495 spiga 1.320 # import tarfile
496     # try:
497     # tar = tarfile.open(self.tarNameWithPath, "a")
498     # tar.add(self.argsFile, os.path.basename(self.argsFile))
499     # tar.close()
500     # except IOError, exc:
501     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
502     # msg += str(exc)
503     # raise CrabException(msg)
504     # except tarfile.TarError, exc:
505     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
506     # msg += str(exc)
507     # raise CrabException(msg)
508 ewv 1.325
509 spiga 1.293 def CreateXML(self):
510     """
511 ewv 1.295 """
512 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
513     outfile = file( self.argsFile, 'w').write(str(result))
514 ewv 1.295 return
515 spiga 1.293
516     def addEntry(self, listDictions):
517     """
518     _addEntry_
519 ewv 1.295
520 spiga 1.293 add an entry to the xml file
521     """
522     from IMProv.IMProvLoader import loadIMProvFile
523     ## load xml
524 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
525 spiga 1.293 entrname= 'Job'
526     for dictions in listDictions:
527     report = IMProvNode(entrname , None, **dictions)
528     improvDoc.addNode(report)
529 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
530 gutsche 1.3 return
531 ewv 1.131
532 gutsche 1.3 def numberOfJobs(self):
533 spiga 1.269 return self.dict['njobs']
534 gutsche 1.3
535 slacapra 1.1 def getTarBall(self, exe):
536     """
537     Return the TarBall with lib and exe
538     """
539 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
540     if os.path.exists(self.tgzNameWithPath):
541     return self.tgzNameWithPath
542 slacapra 1.1
543     # Prepare a tar gzipped file with user binaries.
544     self.buildTar_(exe)
545    
546 spiga 1.320 return string.strip(self.tgzNameWithPath)
547 slacapra 1.1
548     def buildTar_(self, executable):
549    
550     # First of all declare the user Scram area
551     swArea = self.scram.getSWArea_()
552     swReleaseTop = self.scram.getReleaseTop_()
553 ewv 1.131
554 slacapra 1.1 ## check if working area is release top
555     if swReleaseTop == '' or swArea == swReleaseTop:
556 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
557 slacapra 1.1 return
558    
559 slacapra 1.61 import tarfile
560     try: # create tar ball
561 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
562 slacapra 1.61 ## First find the executable
563 slacapra 1.86 if (self.executable != ''):
564 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
565     if ( not exeWithPath ):
566     raise CrabException('User executable '+executable+' not found')
567 ewv 1.131
568 slacapra 1.61 ## then check if it's private or not
569     if exeWithPath.find(swReleaseTop) == -1:
570     # the exe is private, so we must ship
571 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
572 slacapra 1.61 path = swArea+'/'
573 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
574     if exeWithPath.find(path) >= 0 :
575     exe = string.replace(exeWithPath, path,'')
576 slacapra 1.129 tar.add(path+exe,exe)
577 corvo 1.85 else :
578     tar.add(exeWithPath,os.path.basename(executable))
579 slacapra 1.61 pass
580     else:
581     # the exe is from release, we'll find it on WN
582     pass
583 ewv 1.131
584 slacapra 1.61 ## Now get the libraries: only those in local working area
585 slacapra 1.256 tar.dereference=True
586 slacapra 1.61 libDir = 'lib'
587     lib = swArea+'/' +libDir
588 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
589 slacapra 1.61 if os.path.exists(lib):
590     tar.add(lib,libDir)
591 ewv 1.131
592 slacapra 1.61 ## Now check if module dir is present
593     moduleDir = 'module'
594     module = swArea + '/' + moduleDir
595     if os.path.isdir(module):
596     tar.add(module,moduleDir)
597 slacapra 1.256 tar.dereference=False
598 slacapra 1.61
599     ## Now check if any data dir(s) is present
600 spiga 1.179 self.dataExist = False
601 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
602 slacapra 1.206 while len(todo_list):
603     entry, name = todo_list.pop()
604 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
605 slacapra 1.206 continue
606 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
607 slacapra 1.206 entryPath = entry + '/'
608 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
609 slacapra 1.206 if name == 'data':
610     self.dataExist=True
611 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
612 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
613 slacapra 1.206 pass
614     pass
615 ewv 1.182
616 spiga 1.179 ### CMSSW ParameterSet
617     if not self.pset is None:
618     cfg_file = common.work_space.jobDir()+self.configFilename()
619 ewv 1.182 tar.add(cfg_file,self.configFilename())
620 ewv 1.313
621 spiga 1.309 try:
622     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
623     tar.add(crab_cfg_file,'crab.cfg')
624     except:
625     pass
626 fanzago 1.93
627 fanzago 1.152 ## Add ProdCommon dir to tar
628 slacapra 1.211 prodcommonDir = './'
629     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
630 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
631 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
632     'WMCore/__init__.py','WMCore/Algorithms']
633 slacapra 1.214 for file in neededStuff:
634     tar.add(prodcommonPath+file,prodcommonDir+file)
635 spiga 1.179
636     ##### ML stuff
637     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
638     path=os.environ['CRABDIR'] + '/python/'
639     for file in ML_file_list:
640     tar.add(path+file,file)
641    
642     ##### Utils
643 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
644 spiga 1.179 for file in Utils_file_list:
645     tar.add(path+file,file)
646 ewv 1.131
647 ewv 1.182 ##### AdditionalFiles
648 slacapra 1.253 tar.dereference=True
649 spiga 1.179 for file in self.additional_inbox_files:
650     tar.add(file,string.split(file,'/')[-1])
651 slacapra 1.253 tar.dereference=False
652 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
653 ewv 1.182
654 slacapra 1.61 tar.close()
655 mcinquil 1.241 except IOError, exc:
656 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
657 spiga 1.304 msg += str(exc)
658     raise CrabException(msg)
659 mcinquil 1.241 except tarfile.TarError, exc:
660 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
661 spiga 1.304 msg += str(exc)
662     raise CrabException(msg)
663 spiga 1.300
664 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
665     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
666 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
667 ewv 1.250 +'MB input sandbox limit \n'
668 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
669     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
670 mcinquil 1.336 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServerForUsers#Server_available_for_users'
671 spiga 1.238 raise CrabException(msg)
672 gutsche 1.72
673 slacapra 1.61 ## create tar-ball with ML stuff
674 slacapra 1.97
675 spiga 1.165 def wsSetupEnvironment(self, nj=0):
676 slacapra 1.1 """
677     Returns part of a job script which prepares
678     the execution environment for the job 'nj'.
679     """
680 ewv 1.334 psetName = 'pset.py'
681    
682 slacapra 1.1 # Prepare JobType-independent part
683 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
684 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
685 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
686 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
687 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
688 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
689 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
690 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
691     txt += ' job_exit_code=10016\n'
692     txt += ' func_exit\n'
693 gutsche 1.3 txt += ' fi\n'
694 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
695 gutsche 1.3 txt += '\n'
696     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
697     txt += ' cd $WORKING_DIR\n'
698 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
699 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
700 spiga 1.282 #Setup SGE Environment
701 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
702 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
703    
704 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
705     txt += self.wsSetupCMSLCGEnvironment_()
706    
707 mcinquil 1.340 #Setup PBS Environment
708     txt += 'elif [ $middleware == SGE ]; then\n'
709     txt += self.wsSetupCMSLCGEnvironment_()
710    
711 gutsche 1.3 txt += 'fi\n'
712 slacapra 1.1
713     # Prepare JobType-specific part
714     scram = self.scram.commandName()
715     txt += '\n\n'
716 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
717     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
718 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
719     txt += 'status=$?\n'
720     txt += 'if [ $status != 0 ] ; then\n'
721 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
722     txt += ' job_exit_code=10034\n'
723 fanzago 1.163 txt += ' func_exit\n'
724 slacapra 1.1 txt += 'fi \n'
725     txt += 'cd '+self.version+'\n'
726 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
727 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
728 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
729 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
730     txt += ' echo "ERROR ==> Problem with the command: "\n'
731     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
732     txt += ' job_exit_code=10034\n'
733     txt += ' func_exit\n'
734     txt += 'fi \n'
735 slacapra 1.1 # Handle the arguments:
736     txt += "\n"
737 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
738 slacapra 1.1 txt += "\n"
739 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
740 slacapra 1.1 txt += "then\n"
741 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
742     txt += ' job_exit_code=50113\n'
743     txt += " func_exit\n"
744 slacapra 1.1 txt += "fi\n"
745     txt += "\n"
746    
747     # Prepare job-specific part
748     job = common.job_list[nj]
749 ewv 1.131 if (self.datasetPath):
750 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
751     #DataTier = self.datasetPath.split("/")[2]
752 fanzago 1.93 txt += '\n'
753     txt += 'DatasetPath='+self.datasetPath+'\n'
754    
755 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
756 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
757 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
758 fanzago 1.93
759     else:
760 fanzago 1.318 #self.primaryDataset = 'null'
761 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
762     txt += 'PrimaryDataset=null\n'
763     txt += 'DataTier=null\n'
764     txt += 'ApplicationFamily=MCDataTier\n'
765 ewv 1.170 if self.pset != None:
766 spiga 1.42 pset = os.path.basename(job.configFilename())
767     txt += '\n'
768 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
769 spiga 1.296
770 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
771     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
772     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
773     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
774 slacapra 1.90
775 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
776 ewv 1.319 else:
777 spiga 1.314 txt += '\n'
778 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
779 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
780 gutsche 1.3 return txt
781 slacapra 1.176
782 fanzago 1.166 def wsUntarSoftware(self, nj=0):
783 gutsche 1.3 """
784     Put in the script the commands to build an executable
785     or a library.
786     """
787    
788 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
789 gutsche 1.3
790     if os.path.isfile(self.tgzNameWithPath):
791 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
792 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
793 fanzago 1.285 if self.debug_wrapper==1 :
794 spiga 1.199 txt += 'ls -Al \n'
795 gutsche 1.3 txt += 'untar_status=$? \n'
796     txt += 'if [ $untar_status -ne 0 ]; then \n'
797 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
798     txt += ' job_exit_code=$untar_status\n'
799     txt += ' func_exit\n'
800 gutsche 1.3 txt += 'else \n'
801     txt += ' echo "Successful untar" \n'
802     txt += 'fi \n'
803 gutsche 1.50 txt += '\n'
804 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
805 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
806 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
807 gutsche 1.50 txt += 'else\n'
808 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
809 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
810 gutsche 1.50 txt += 'fi\n'
811     txt += '\n'
812    
813 gutsche 1.3 pass
814 ewv 1.131
815 slacapra 1.1 return txt
816 ewv 1.170
817 fanzago 1.166 def wsBuildExe(self, nj=0):
818     """
819     Put in the script the commands to build an executable
820     or a library.
821     """
822    
823     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
824     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
825    
826 ewv 1.170 txt += 'rm -r lib/ module/ \n'
827     txt += 'mv $RUNTIME_AREA/lib/ . \n'
828     txt += 'mv $RUNTIME_AREA/module/ . \n'
829 spiga 1.186 if self.dataExist == True:
830     txt += 'rm -r src/ \n'
831     txt += 'mv $RUNTIME_AREA/src/ . \n'
832 ewv 1.182 if len(self.additional_inbox_files)>0:
833 spiga 1.179 for file in self.additional_inbox_files:
834 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
835 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
836     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
837 ewv 1.170
838 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
839 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
840 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
841 fanzago 1.166 txt += 'else\n'
842 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
843 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
844     txt += 'fi\n'
845     txt += '\n'
846    
847 slacapra 1.302 if self.pset != None:
848 ewv 1.334 psetName = 'pset.py'
849    
850 slacapra 1.302 txt += '\n'
851     if self.debug_wrapper == 1:
852     txt += 'echo "***** cat ' + psetName + ' *********"\n'
853     txt += 'cat ' + psetName + '\n'
854     txt += 'echo "****** end ' + psetName + ' ********"\n'
855     txt += '\n'
856     txt += 'echo "***********************" \n'
857     txt += 'which edmConfigHash \n'
858     txt += 'echo "***********************" \n'
859 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
860     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
861 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
862     #### FEDE temporary fix for noEdm files #####
863     txt += 'if [ -z "$PSETHASH" ]; then \n'
864     txt += ' export PSETHASH=null\n'
865     txt += 'fi \n'
866     #############################################
867     txt += '\n'
868 fanzago 1.166 return txt
869 slacapra 1.1
870 ewv 1.131
871 slacapra 1.1 def executableName(self):
872 ewv 1.192 if self.scriptExe:
873 spiga 1.42 return "sh "
874     else:
875     return self.executable
876 slacapra 1.1
877     def executableArgs(self):
878 ewv 1.276 if self.scriptExe:
879 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
880 fanzago 1.115 else:
881 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
882 slacapra 1.1
883     def inputSandbox(self, nj):
884     """
885     Returns a list of filenames to be put in JDL input sandbox.
886     """
887     inp_box = []
888     if os.path.isfile(self.tgzNameWithPath):
889     inp_box.append(self.tgzNameWithPath)
890 spiga 1.320 if os.path.isfile(self.argsFile):
891     inp_box.append(self.argsFile)
892 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
893 slacapra 1.1 return inp_box
894    
895     def outputSandbox(self, nj):
896     """
897     Returns a list of filenames to be put in JDL output sandbox.
898     """
899     out_box = []
900    
901     ## User Declared output files
902 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
903 ewv 1.131 n_out = nj + 1
904 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
905 slacapra 1.1 return out_box
906    
907    
908     def wsRenameOutput(self, nj):
909     """
910     Returns part of a job script which renames the produced files.
911     """
912    
913 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
914 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
915     txt += 'echo ">>> current directory content:"\n'
916 fanzago 1.285 if self.debug_wrapper==1:
917 spiga 1.199 txt += 'ls -Al\n'
918 fanzago 1.145 txt += '\n'
919 slacapra 1.54
920 fanzago 1.128 for fileWithSuffix in (self.output_file):
921 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
922 slacapra 1.1 txt += '\n'
923 gutsche 1.7 txt += '# check output file\n'
924 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
925 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
926     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
927 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
928 ewv 1.147 else:
929     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
930     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
931 slacapra 1.106 txt += 'else\n'
932 fanzago 1.161 txt += ' job_exit_code=60302\n'
933     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
934 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
935 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
936     txt += ' echo "prepare dummy output file"\n'
937     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
938     txt += ' fi \n'
939 slacapra 1.1 txt += 'fi\n'
940 slacapra 1.105 file_list = []
941     for fileWithSuffix in (self.output_file):
942 spiga 1.246 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
943 ewv 1.131
944 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
945 fanzago 1.149 txt += '\n'
946 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
947     txt += 'echo ">>> current directory content:"\n'
948 fanzago 1.285 if self.debug_wrapper==1:
949 spiga 1.199 txt += 'ls -Al\n'
950 fanzago 1.148 txt += '\n'
951 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
952 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
953 slacapra 1.1 return txt
954    
955 slacapra 1.63 def getRequirements(self, nj=[]):
956 slacapra 1.1 """
957 ewv 1.131 return job requirements to add to jdl files
958 slacapra 1.1 """
959     req = ''
960 slacapra 1.47 if self.version:
961 slacapra 1.10 req='Member("VO-cms-' + \
962 slacapra 1.47 self.version + \
963 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
964 ewv 1.192 if self.executable_arch:
965 gutsche 1.107 req+=' && Member("VO-cms-' + \
966 slacapra 1.105 self.executable_arch + \
967     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
968 gutsche 1.35
969     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
970 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
971 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
972     if ( self.cfg_params.get('GRID.use_cream',None) ):
973     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
974     else:
975     req += ' && other.GlueCEStateStatus == "Production" '
976 gutsche 1.35
977 slacapra 1.1 return req
978 gutsche 1.3
979     def configFilename(self):
980     """ return the config filename """
981 ewv 1.334 return self.name()+'.py'
982 gutsche 1.3
983     def wsSetupCMSOSGEnvironment_(self):
984     """
985     Returns part of a job script which is prepares
986     the execution environment and which is common for all CMS jobs.
987     """
988 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
989     txt += ' echo ">>> setup CMS OSG environment:"\n'
990 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
991     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
992 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
993 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
994 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
995 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
996     txt += ' else\n'
997 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
998     txt += ' job_exit_code=10020\n'
999     txt += ' func_exit\n'
1000 fanzago 1.133 txt += ' fi\n'
1001 gutsche 1.3 txt += '\n'
1002 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1003 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1004 gutsche 1.3
1005     return txt
1006 ewv 1.131
1007 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1008     """
1009     Returns part of a job script which is prepares
1010     the execution environment and which is common for all CMS jobs.
1011     """
1012 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1013     txt += ' echo ">>> setup CMS LCG environment:"\n'
1014 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1015     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1016     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1017     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1018 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1019     txt += ' job_exit_code=10031\n'
1020     txt += ' func_exit\n'
1021 fanzago 1.133 txt += ' else\n'
1022     txt += ' echo "Sourcing environment... "\n'
1023     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1024 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1025     txt += ' job_exit_code=10020\n'
1026     txt += ' func_exit\n'
1027 fanzago 1.133 txt += ' fi\n'
1028     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1029     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1030     txt += ' result=$?\n'
1031     txt += ' if [ $result -ne 0 ]; then\n'
1032 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1033     txt += ' job_exit_code=10032\n'
1034     txt += ' func_exit\n'
1035 fanzago 1.133 txt += ' fi\n'
1036     txt += ' fi\n'
1037     txt += ' \n'
1038 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1039 gutsche 1.3 return txt
1040 gutsche 1.5
1041 spiga 1.238 def wsModifyReport(self, nj):
1042 fanzago 1.93 """
1043 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1044 fanzago 1.93 """
1045 ewv 1.250
1046 fanzago 1.281 txt = ''
1047 fanzago 1.292 if (self.copy_data == 1):
1048 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1049 ewv 1.283
1050 spiga 1.238
1051     txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1052 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1053 fanzago 1.175 txt += 'else\n'
1054     txt += ' FOR_LFN=/copy_problems/ \n'
1055     txt += 'fi\n'
1056 ewv 1.182
1057 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1058 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1059 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1060 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1061     txt += 'echo "endpoint = $endpoint"\n'
1062     txt += 'SE_PATH=$endpoint\n'
1063     txt += 'echo "SE_PATH = $endpoint"\n'
1064 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1065     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1066 fanzago 1.337
1067 fanzago 1.281
1068 fanzago 1.323 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1069 fanzago 1.318 if (self.publish_data == 1):
1070     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1071 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1072     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1073 fanzago 1.281
1074 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1075     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1076 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1077     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1078     txt += ' modifyReport_result=70500\n'
1079     txt += ' job_exit_code=$modifyReport_result\n'
1080     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1081     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1082     txt += 'else\n'
1083     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1084 spiga 1.103 txt += 'fi\n'
1085 fanzago 1.93 return txt
1086 ewv 1.283
1087 ewv 1.192 def wsParseFJR(self):
1088 spiga 1.189 """
1089 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1090 spiga 1.189 """
1091     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1092     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1093     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1094     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1095 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1096 fanzago 1.285 if self.debug_wrapper==1 :
1097 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1098     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1099 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1100     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1101 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1102 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1103 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1104     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1105 spiga 1.189 txt += ' else\n'
1106     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1107     txt += ' fi\n'
1108     txt += ' else\n'
1109     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1110     txt += ' fi\n'
1111     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1112 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1113 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1114 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1115 spiga 1.296 """
1116 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1117 spiga 1.189 # VERIFY PROCESSED DATA
1118 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1119     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1120     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1121     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1122     txt += ' mv tmp.txt input-files.txt\n'
1123     txt += ' echo "cat input-files.txt"\n'
1124     txt += ' echo "----------------------"\n'
1125     txt += ' cat input-files.txt\n'
1126     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1127     txt += ' mv tmp.txt processed-files.txt\n'
1128     txt += ' echo "----------------------"\n'
1129     txt += ' echo "cat processed-files.txt"\n'
1130     txt += ' echo "----------------------"\n'
1131     txt += ' cat processed-files.txt\n'
1132     txt += ' echo "----------------------"\n'
1133 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1134 fanzago 1.273 txt += ' fileverify_status=$?\n'
1135     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1136     txt += ' executable_exit_status=30001\n'
1137     txt += ' echo "ERROR ==> not all input files processed"\n'
1138     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1139     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1140     txt += ' fi\n'
1141 spiga 1.296 """
1142 spiga 1.232 txt += ' fi\n'
1143 spiga 1.189 txt += 'else\n'
1144     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1145     txt += 'fi\n'
1146     txt += '\n'
1147 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1148 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1149     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1150     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1151     txt += ' job_exit_code=$executable_exit_status\n'
1152     txt += ' func_exit\n'
1153     txt += 'fi\n\n'
1154 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1155     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1156     txt += 'job_exit_code=$executable_exit_status\n'
1157    
1158     return txt
1159    
1160 gutsche 1.5 def setParam_(self, param, value):
1161     self._params[param] = value
1162    
1163     def getParams(self):
1164     return self._params
1165 gutsche 1.8
1166 spiga 1.257 def outList(self,list=False):
1167 mcinquil 1.121 """
1168     check the dimension of the output files
1169     """
1170 spiga 1.169 txt = ''
1171     txt += 'echo ">>> list of expected files on output sandbox"\n'
1172 mcinquil 1.121 listOutFiles = []
1173 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1174 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1175 spiga 1.268 if len(self.output_file) <= 0:
1176     msg ="WARNING: no output files name have been defined!!\n"
1177     msg+="\tno output files will be reported back/staged\n"
1178 spiga 1.304 common.logger.info(msg)
1179 fanzago 1.148 if (self.return_data == 1):
1180 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1181 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1182 spiga 1.169 listOutFiles.append(stdout)
1183     listOutFiles.append(stderr)
1184 ewv 1.156 else:
1185 spiga 1.157 for file in (self.output_file_sandbox):
1186 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1187 spiga 1.169 listOutFiles.append(stdout)
1188     listOutFiles.append(stderr)
1189 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1190 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1191 spiga 1.169 txt += 'export filesToCheck\n'
1192 spiga 1.341 taskinfo={}
1193     taskinfo['outfileBasename'] = self.output_file
1194     common._db.updateTask_(taskinfo)
1195 ewv 1.276
1196 spiga 1.257 if list : return self.output_file
1197 ewv 1.170 return txt