ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.352.4.4
Committed: Tue May 4 10:36:18 2010 UTC (14 years, 11 months ago) by spiga
Content type: text/x-python
Branch: CRAB_2_7_1_branch
CVS Tags: CRAB_2_7_1_branch_firstMERGE
Changes since 1.352.4.3: +6 -4 lines
Log Message:
fix for bug 62338

File Contents

# User Rev Content
1 ewv 1.327
2 spiga 1.352.4.4 __revision__ = "$Id: cms_cmssw.py,v 1.352.4.3 2010/04/16 15:14:34 farinafa Exp $"
3     __version__ = "$Revision: 1.352.4.3 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 farinafa 1.346 self.argsList = 2
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.338 ll = len(self.datasetPath.split("/"))
92     if (ll < 4):
93     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
94     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
95     raise CrabException(msg)
96 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
97     self.dataTier = self.datasetPath.split("/")[2]
98 gutsche 1.5
99 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
100 ewv 1.330 self.ads = False
101     if self.datasetPath:
102     self.ads = len(self.datasetPath.split("/")) > 4
103 spiga 1.352.4.2 self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
104 ewv 1.295
105 ewv 1.327 # FUTURE: Can remove this check
106     if self.ads and self.CMSSW_major < 3:
107     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
108     common.logger.info(' Only file level, not lumi level, granularity is supported.')
109    
110 spiga 1.288 self.debugWrap=''
111 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
112     if self.debug_wrapper == 1: self.debugWrap='--debug'
113 slacapra 1.291
114 slacapra 1.1 ## now the application
115 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
116 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
117 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
118 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
119 slacapra 1.1
120 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
121 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
122 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
123 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
124 slacapra 1.153 if self.pset.lower() != 'none' :
125     if (not os.path.exists(self.pset)):
126     raise CrabException("User defined PSet file "+self.pset+" does not exist")
127     else:
128     self.pset = None
129 slacapra 1.1
130     # output files
131 slacapra 1.53 ## stuff which must be returned always via sandbox
132     self.output_file_sandbox = []
133    
134     # add fjr report by default via sandbox
135     self.output_file_sandbox.append(self.fjrFileName)
136    
137     # other output files to be returned via sandbox or copied to SE
138 mcinquil 1.216 outfileflag = False
139 slacapra 1.153 self.output_file = []
140     tmp = cfg_params.get('CMSSW.output_file',None)
141     if tmp :
142 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
143 mcinquil 1.216 outfileflag = True #output found
144     #else:
145     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
146 slacapra 1.1
147     # script_exe file as additional file in inputSandbox
148 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
149     if self.scriptExe :
150 slacapra 1.176 if not os.path.isfile(self.scriptExe):
151     msg ="ERROR. file "+self.scriptExe+" not found"
152     raise CrabException(msg)
153     self.additional_inbox_files.append(string.strip(self.scriptExe))
154 slacapra 1.70
155 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
156     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
157    
158 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
159 slacapra 1.176 msg ="Error. script_exe not defined"
160     raise CrabException(msg)
161 spiga 1.42
162 ewv 1.226 # use parent files...
163 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
164 spiga 1.204
165 slacapra 1.1 ## additional input files
166 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
167 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
168 slacapra 1.70 for tmp in tmpAddFiles:
169     tmp = string.strip(tmp)
170     dirname = ''
171     if not tmp[0]=="/": dirname = "."
172 corvo 1.85 files = []
173     if string.find(tmp,"*")>-1:
174     files = glob.glob(os.path.join(dirname, tmp))
175     if len(files)==0:
176     raise CrabException("No additional input file found with this pattern: "+tmp)
177     else:
178     files.append(tmp)
179 slacapra 1.70 for file in files:
180     if not os.path.exists(file):
181     raise CrabException("Additional input file not found: "+file)
182 slacapra 1.45 pass
183 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
184 slacapra 1.1 pass
185     pass
186 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
187 slacapra 1.153 pass
188 gutsche 1.3
189 gutsche 1.35
190 ewv 1.160 ## New method of dealing with seeds
191     self.incrementSeeds = []
192     self.preserveSeeds = []
193     if cfg_params.has_key('CMSSW.preserve_seeds'):
194     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
195     for tmp in tmpList:
196     tmp.strip()
197     self.preserveSeeds.append(tmp)
198     if cfg_params.has_key('CMSSW.increment_seeds'):
199     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
200     for tmp in tmpList:
201     tmp.strip()
202     self.incrementSeeds.append(tmp)
203    
204 fanzago 1.318 # Copy/return/publish
205 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
206     self.return_data = int(cfg_params.get('USER.return_data',0))
207 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
208     if (self.publish_data == 1):
209     if not cfg_params.has_key('USER.publish_data_name'):
210     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
211     else:
212     self.processedDataset = cfg_params['USER.publish_data_name']
213 ewv 1.329 """
214 fanzago 1.328 #### check of length of datasetname to publish ####
215 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
216 fanzago 1.328 print "test 100 char limit on datasetname"
217     ###
218     len_file = 0
219     print "self.output_file = ", self.output_file
220     for file in self.output_file:
221     length = len(file)
222     if length > len_file:
223     len_file = length
224 ewv 1.329 print "len_file = ", len_file
225 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
226 ewv 1.329 ###
227 fanzago 1.318 user = getUserName()
228 fanzago 1.328 len_user_name = len(user)
229 fanzago 1.318 common.logger.debug("user = " + user)
230 fanzago 1.328 print "len_user_name = ", len_user_name
231 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
232 fanzago 1.328
233 fanzago 1.318 len_processedDataset = len(self.processedDataset)
234     common.logger.debug("processedDataset " + self.processedDataset)
235     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
236 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
237 ewv 1.329
238 fanzago 1.318 if (self.datasetPath != None ):
239     len_primary = len(self.primaryDataset)
240     common.logger.debug("primaryDataset = " + self.primaryDataset)
241     common.logger.debug("len_primary = " + str(len_primary))
242 fanzago 1.328 if (len_primary > 100):
243     raise CrabException("Warning: primary datasetname has to be < 100 characters")
244     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
245     #if (len_processedDataset > (59 - len_user_name - len_primary)):
246     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
247     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
248     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
249 fanzago 1.318 else:
250 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
251     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
252     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
253     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
254 ewv 1.329 """
255 ewv 1.276
256     self.conf = {}
257     self.conf['pubdata'] = None
258 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
259 slacapra 1.1 #DBSDLS-start
260 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
261 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
262     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
263 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
264 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
265 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
266 gutsche 1.35 blockSites = {}
267 spiga 1.342 #wmbs
268     self.automation = int(self.cfg_params.get('WMBS.automation',0))
269     if self.automation == 0:
270     if self.datasetPath:
271     blockSites = self.DataDiscoveryAndLocation(cfg_params)
272     #DBSDLS-end
273     self.conf['blockSites']=blockSites
274 ewv 1.347
275 spiga 1.342 ## Select Splitting
276     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
277 ewv 1.347
278 spiga 1.342 if self.selectNoInput:
279     if self.pset == None:
280     self.algo = 'ForScript'
281     else:
282     self.algo = 'NoInput'
283     self.conf['managedGenerators']=self.managedGenerators
284     self.conf['generator']=self.generator
285 spiga 1.352.4.2 elif self.ads or self.lumiMask:
286 spiga 1.342 self.algo = 'LumiBased'
287     elif splitByRun ==1:
288     self.algo = 'RunBased'
289 spiga 1.42 else:
290 spiga 1.342 self.algo = 'EventBased'
291     common.logger.debug("Job splitting method: %s" % self.algo)
292 ewv 1.347
293 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
294     self.dict = splitter.Algos()[self.algo]()
295 gutsche 1.5
296 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
297     self.rootArgsFilename= 'arguments'
298 spiga 1.208 # modify Pset only the first time
299 spiga 1.320 if isNew:
300     if self.pset != None: self.ModifyPset()
301 spiga 1.300
302 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
303     self.tarNameWithPath = self.getTarBall(self.executable)
304 spiga 1.293
305    
306     def ModifyPset(self):
307     import PsetManipulator as pp
308 ewv 1.335
309     # If pycfg_params set, fake out the config script
310     # to make it think it was called with those args
311     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
312     if pycfg_params:
313     trueArgv = sys.argv
314     sys.argv = [self.pset]
315     sys.argv.extend(pycfg_params.split(' '))
316 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
317 ewv 1.335 if pycfg_params: # Restore original sys.argv
318     sys.argv = trueArgv
319    
320 spiga 1.293 try:
321     # Add FrameworkJobReport to parameter-set, set max events.
322     # Reset later for data jobs by writeCFG which does all modifications
323 ewv 1.295 PsetEdit.maxEvent(1)
324 spiga 1.293 PsetEdit.skipEvent(0)
325     PsetEdit.psetWriter(self.configFilename())
326     ## If present, add TFileService to output files
327 slacapra 1.349 if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
328 spiga 1.293 tfsOutput = PsetEdit.getTFileService()
329     if tfsOutput:
330     if tfsOutput in self.output_file:
331 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
332 spiga 1.293 else:
333     outfileflag = True #output found
334     self.output_file.append(tfsOutput)
335 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
336 spiga 1.293 pass
337     pass
338 ewv 1.321 # If present and requested, add PoolOutputModule to output files
339 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
340 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
341     if edmOutput:
342 ewv 1.321 for outputFile in edmOutput:
343     if outputFile in self.output_file:
344 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
345 ewv 1.321 else:
346     self.output_file.append(outputFile)
347     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
348     # not requested, check anyhow to avoid accidental T2 overload
349 slacapra 1.297 else:
350 ewv 1.321 if edmOutput:
351     missedFiles = []
352     for outputFile in edmOutput:
353     if outputFile not in self.output_file:
354     missedFiles.append(outputFile)
355     if missedFiles:
356     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
357     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
358     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
359     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
360     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
361     common.logger.info(msg)
362 spiga 1.322 else :
363 ewv 1.321 raise CrabException(msg)
364 ewv 1.301
365     if (PsetEdit.getBadFilesSetting()):
366     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
367 spiga 1.304 common.logger.info(msg)
368 ewv 1.301
369 slacapra 1.297 except CrabException, msg:
370 spiga 1.304 common.logger.info(str(msg))
371 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
372 spiga 1.293 raise CrabException(msg)
373    
374 gutsche 1.3
375 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
376    
377 slacapra 1.86 import DataDiscovery
378     import DataLocation
379 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
380 gutsche 1.3
381     datasetPath=self.datasetPath
382    
383 slacapra 1.1 ## Contact the DBS
384 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
385 slacapra 1.1 try:
386 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
387 slacapra 1.1 self.pubdata.fetchDBSInfo()
388    
389 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
390 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
391     raise CrabException(msg)
392 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
393 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
394     raise CrabException(msg)
395 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
396 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
397 slacapra 1.1 raise CrabException(msg)
398    
399 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
400 slacapra 1.270 #print self.filesbyblock
401 spiga 1.269 self.conf['pubdata']=self.pubdata
402 gutsche 1.3
403 slacapra 1.1 ## get max number of events
404 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
405 slacapra 1.1
406     ## Contact the DLS and build a list of sites hosting the fileblocks
407     try:
408 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
409 gutsche 1.6 dataloc.fetchDLSInfo()
410 slacapra 1.263
411 slacapra 1.41 except DataLocation.DataLocationError , ex:
412 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
413     raise CrabException(msg)
414 ewv 1.131
415 slacapra 1.1
416 slacapra 1.270 unsorted_sites = dataloc.getSites()
417     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
418     for lfn in self.filesbyblock.keys():
419     if unsorted_sites.has_key(lfn):
420     sites[lfn]=unsorted_sites[lfn]
421     else:
422     sites[lfn]=[]
423    
424 slacapra 1.264 if len(sites)==0:
425 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
426     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
427     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
428 slacapra 1.264 raise CrabException(msg)
429    
430 gutsche 1.35 allSites = []
431     listSites = sites.values()
432 slacapra 1.63 for listSite in listSites:
433     for oneSite in listSite:
434 gutsche 1.35 allSites.append(oneSite)
435 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
436 ewv 1.295
437 gutsche 1.3
438 gutsche 1.92 # screen output
439 spiga 1.352.4.2 if self.ads or self.lumiMask:
440     common.logger.info("Requested (A)DS %s has %s block(s)." %
441 ewv 1.350 (datasetPath, len(self.filesbyblock.keys())))
442     else:
443     common.logger.info("Requested dataset: " + datasetPath + \
444     " has " + str(self.maxEvents) + " events in " + \
445     str(len(self.filesbyblock.keys())) + " blocks.\n")
446 gutsche 1.92
447 gutsche 1.35 return sites
448 ewv 1.131
449 spiga 1.42
450 spiga 1.208 def split(self, jobParams,firstJobID):
451 ewv 1.276
452 spiga 1.293 jobParams = self.dict['args']
453 spiga 1.269 njobs = self.dict['njobs']
454     self.jobDestination = self.dict['jobDestination']
455 ewv 1.131
456 ewv 1.333 if njobs == 0:
457     raise CrabException("Asked to split zero jobs: aborting")
458     if not self.server and not self.local and njobs > 500:
459     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
460 slacapra 1.263
461 gutsche 1.3 # create the empty structure
462     for i in range(njobs):
463     jobParams.append("")
464 ewv 1.131
465 spiga 1.165 listID=[]
466     listField=[]
467 spiga 1.293 listDictions=[]
468 spiga 1.300 exist= os.path.exists(self.argsFile)
469 spiga 1.208 for id in range(njobs):
470     job = id + int(firstJobID)
471 spiga 1.167 listID.append(job+1)
472 spiga 1.162 job_ToSave ={}
473 spiga 1.169 concString = ' '
474 spiga 1.165 argu=''
475 spiga 1.293 str_argu = str(job+1)
476 spiga 1.208 if len(jobParams[id]):
477 ewv 1.295 argu = {'JobID': job+1}
478 spiga 1.293 for i in range(len(jobParams[id])):
479     argu[self.dict['params'][i]]=jobParams[id][i]
480 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
481 ewv 1.295 # just for debug
482 spiga 1.293 str_argu += concString.join(jobParams[id])
483 spiga 1.314 if argu != '': listDictions.append(argu)
484 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
485 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
486 spiga 1.165 listField.append(job_ToSave)
487 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
488     cms_se = CmsSEMap()
489 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
490 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
491 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
492     msg+="\t CMSDestination: %s "%(str(SEDestination))
493 spiga 1.307 common.logger.log(10-1,msg)
494 spiga 1.293 # write xml
495 ewv 1.295 if len(listDictions):
496 spiga 1.293 if exist==False: self.CreateXML()
497     self.addEntry(listDictions)
498 spiga 1.320 # self.zipXMLfile()
499 spiga 1.187 common._db.updateJob_(listID,listField)
500 spiga 1.293 return
501 ewv 1.313
502 spiga 1.320 # def zipXMLfile(self):
503 ewv 1.313
504 spiga 1.320 # import tarfile
505     # try:
506     # tar = tarfile.open(self.tarNameWithPath, "a")
507     # tar.add(self.argsFile, os.path.basename(self.argsFile))
508     # tar.close()
509     # except IOError, exc:
510     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
511     # msg += str(exc)
512     # raise CrabException(msg)
513     # except tarfile.TarError, exc:
514     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
515     # msg += str(exc)
516     # raise CrabException(msg)
517 ewv 1.325
518 spiga 1.293 def CreateXML(self):
519     """
520 ewv 1.295 """
521 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
522     outfile = file( self.argsFile, 'w').write(str(result))
523 ewv 1.295 return
524 spiga 1.293
525     def addEntry(self, listDictions):
526     """
527     _addEntry_
528 ewv 1.295
529 spiga 1.293 add an entry to the xml file
530     """
531     from IMProv.IMProvLoader import loadIMProvFile
532     ## load xml
533 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
534 spiga 1.293 entrname= 'Job'
535     for dictions in listDictions:
536     report = IMProvNode(entrname , None, **dictions)
537     improvDoc.addNode(report)
538 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
539 gutsche 1.3 return
540 ewv 1.131
541 gutsche 1.3 def numberOfJobs(self):
542 spiga 1.342 #wmbs
543 ewv 1.347 if self.automation==0:
544 spiga 1.342 return self.dict['njobs']
545     else:
546     return None
547 ewv 1.347
548 slacapra 1.1 def getTarBall(self, exe):
549     """
550     Return the TarBall with lib and exe
551     """
552 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
553     if os.path.exists(self.tgzNameWithPath):
554     return self.tgzNameWithPath
555 slacapra 1.1
556     # Prepare a tar gzipped file with user binaries.
557     self.buildTar_(exe)
558    
559 spiga 1.320 return string.strip(self.tgzNameWithPath)
560 slacapra 1.1
561     def buildTar_(self, executable):
562    
563     # First of all declare the user Scram area
564     swArea = self.scram.getSWArea_()
565     swReleaseTop = self.scram.getReleaseTop_()
566 ewv 1.131
567 slacapra 1.1 ## check if working area is release top
568     if swReleaseTop == '' or swArea == swReleaseTop:
569 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
570 slacapra 1.1 return
571    
572 slacapra 1.61 import tarfile
573     try: # create tar ball
574 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
575 slacapra 1.61 ## First find the executable
576 slacapra 1.86 if (self.executable != ''):
577 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
578     if ( not exeWithPath ):
579     raise CrabException('User executable '+executable+' not found')
580 ewv 1.131
581 slacapra 1.61 ## then check if it's private or not
582     if exeWithPath.find(swReleaseTop) == -1:
583     # the exe is private, so we must ship
584 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
585 slacapra 1.61 path = swArea+'/'
586 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
587     if exeWithPath.find(path) >= 0 :
588     exe = string.replace(exeWithPath, path,'')
589 slacapra 1.129 tar.add(path+exe,exe)
590 corvo 1.85 else :
591     tar.add(exeWithPath,os.path.basename(executable))
592 slacapra 1.61 pass
593     else:
594     # the exe is from release, we'll find it on WN
595     pass
596 ewv 1.131
597 slacapra 1.61 ## Now get the libraries: only those in local working area
598 slacapra 1.256 tar.dereference=True
599 slacapra 1.61 libDir = 'lib'
600     lib = swArea+'/' +libDir
601 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
602 slacapra 1.61 if os.path.exists(lib):
603     tar.add(lib,libDir)
604 ewv 1.131
605 slacapra 1.61 ## Now check if module dir is present
606     moduleDir = 'module'
607     module = swArea + '/' + moduleDir
608     if os.path.isdir(module):
609     tar.add(module,moduleDir)
610 slacapra 1.256 tar.dereference=False
611 slacapra 1.61
612     ## Now check if any data dir(s) is present
613 spiga 1.179 self.dataExist = False
614 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
615 slacapra 1.206 while len(todo_list):
616     entry, name = todo_list.pop()
617 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
618 slacapra 1.206 continue
619 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
620 slacapra 1.206 entryPath = entry + '/'
621 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
622 slacapra 1.206 if name == 'data':
623     self.dataExist=True
624 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
625 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
626 slacapra 1.206 pass
627     pass
628 ewv 1.182
629 spiga 1.179 ### CMSSW ParameterSet
630     if not self.pset is None:
631     cfg_file = common.work_space.jobDir()+self.configFilename()
632 ewv 1.182 tar.add(cfg_file,self.configFilename())
633 ewv 1.313
634 spiga 1.309 try:
635     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
636     tar.add(crab_cfg_file,'crab.cfg')
637     except:
638     pass
639 fanzago 1.93
640 fanzago 1.152 ## Add ProdCommon dir to tar
641 slacapra 1.211 prodcommonDir = './'
642     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
643 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
644 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
645     'WMCore/__init__.py','WMCore/Algorithms']
646 slacapra 1.214 for file in neededStuff:
647     tar.add(prodcommonPath+file,prodcommonDir+file)
648 spiga 1.179
649     ##### ML stuff
650     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
651     path=os.environ['CRABDIR'] + '/python/'
652     for file in ML_file_list:
653     tar.add(path+file,file)
654    
655     ##### Utils
656 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
657 spiga 1.179 for file in Utils_file_list:
658     tar.add(path+file,file)
659 ewv 1.131
660 ewv 1.182 ##### AdditionalFiles
661 slacapra 1.253 tar.dereference=True
662 spiga 1.179 for file in self.additional_inbox_files:
663     tar.add(file,string.split(file,'/')[-1])
664 slacapra 1.253 tar.dereference=False
665 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
666 ewv 1.182
667 slacapra 1.61 tar.close()
668 mcinquil 1.241 except IOError, exc:
669 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
670 spiga 1.304 msg += str(exc)
671     raise CrabException(msg)
672 mcinquil 1.241 except tarfile.TarError, exc:
673 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
674 spiga 1.304 msg += str(exc)
675     raise CrabException(msg)
676 spiga 1.300
677 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
678     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
679 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
680 ewv 1.250 +'MB input sandbox limit \n'
681 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
682     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
683 farinafa 1.352.4.3 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabServerForUsers#Server_available_for_users'
684 spiga 1.238 raise CrabException(msg)
685 gutsche 1.72
686 slacapra 1.61 ## create tar-ball with ML stuff
687 slacapra 1.97
688 spiga 1.165 def wsSetupEnvironment(self, nj=0):
689 slacapra 1.1 """
690     Returns part of a job script which prepares
691     the execution environment for the job 'nj'.
692     """
693 ewv 1.334 psetName = 'pset.py'
694    
695 slacapra 1.1 # Prepare JobType-independent part
696 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
697 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
698 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
699     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
700     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
701 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
702 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
703 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
704 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
705 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
706 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
707     txt += ' job_exit_code=10016\n'
708     txt += ' func_exit\n'
709 gutsche 1.3 txt += ' fi\n'
710 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
711 gutsche 1.3 txt += '\n'
712     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
713     txt += ' cd $WORKING_DIR\n'
714 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
715 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
716 spiga 1.282 #Setup SGE Environment
717 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
718 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
719    
720 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
721     txt += self.wsSetupCMSLCGEnvironment_()
722    
723 mcinquil 1.340 #Setup PBS Environment
724 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
725 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
726    
727 gutsche 1.3 txt += 'fi\n'
728 slacapra 1.1
729     # Prepare JobType-specific part
730     scram = self.scram.commandName()
731     txt += '\n\n'
732 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
733     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
734 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
735     txt += 'status=$?\n'
736     txt += 'if [ $status != 0 ] ; then\n'
737 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
738     txt += ' job_exit_code=10034\n'
739 fanzago 1.163 txt += ' func_exit\n'
740 slacapra 1.1 txt += 'fi \n'
741     txt += 'cd '+self.version+'\n'
742 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
743 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
744 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
745 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
746     txt += ' echo "ERROR ==> Problem with the command: "\n'
747     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
748     txt += ' job_exit_code=10034\n'
749     txt += ' func_exit\n'
750     txt += 'fi \n'
751 slacapra 1.1 # Handle the arguments:
752     txt += "\n"
753 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
754 slacapra 1.1 txt += "\n"
755 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
756 slacapra 1.1 txt += "then\n"
757 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
758     txt += ' job_exit_code=50113\n'
759     txt += " func_exit\n"
760 slacapra 1.1 txt += "fi\n"
761     txt += "\n"
762    
763     # Prepare job-specific part
764     job = common.job_list[nj]
765 ewv 1.131 if (self.datasetPath):
766 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
767     #DataTier = self.datasetPath.split("/")[2]
768 fanzago 1.93 txt += '\n'
769     txt += 'DatasetPath='+self.datasetPath+'\n'
770    
771 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
772 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
773 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
774 fanzago 1.93
775     else:
776 fanzago 1.318 #self.primaryDataset = 'null'
777 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
778     txt += 'PrimaryDataset=null\n'
779     txt += 'DataTier=null\n'
780     txt += 'ApplicationFamily=MCDataTier\n'
781 ewv 1.170 if self.pset != None:
782 spiga 1.42 pset = os.path.basename(job.configFilename())
783     txt += '\n'
784 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
785 spiga 1.296
786 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
787     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
788     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
789     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
790 slacapra 1.90
791 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
792 ewv 1.319 else:
793 spiga 1.314 txt += '\n'
794 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
795 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
796 gutsche 1.3 return txt
797 slacapra 1.176
798 fanzago 1.166 def wsUntarSoftware(self, nj=0):
799 gutsche 1.3 """
800     Put in the script the commands to build an executable
801     or a library.
802     """
803    
804 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
805 gutsche 1.3
806     if os.path.isfile(self.tgzNameWithPath):
807 spiga 1.352.4.4 txt += 'echo ">>> tar xzf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
808 fanzago 1.285 if self.debug_wrapper==1 :
809 spiga 1.352.4.4 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
810 spiga 1.199 txt += 'ls -Al \n'
811 spiga 1.352.4.4 else:
812     txt += 'tar zxf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
813 gutsche 1.3 txt += 'untar_status=$? \n'
814     txt += 'if [ $untar_status -ne 0 ]; then \n'
815 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
816     txt += ' job_exit_code=$untar_status\n'
817     txt += ' func_exit\n'
818 gutsche 1.3 txt += 'else \n'
819     txt += ' echo "Successful untar" \n'
820     txt += 'fi \n'
821 gutsche 1.50 txt += '\n'
822 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
823 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
824 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
825 gutsche 1.50 txt += 'else\n'
826 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
827 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
828 gutsche 1.50 txt += 'fi\n'
829     txt += '\n'
830    
831 gutsche 1.3 pass
832 ewv 1.131
833 slacapra 1.1 return txt
834 ewv 1.170
835 fanzago 1.166 def wsBuildExe(self, nj=0):
836     """
837     Put in the script the commands to build an executable
838     or a library.
839     """
840    
841     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
842     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
843    
844 ewv 1.170 txt += 'rm -r lib/ module/ \n'
845     txt += 'mv $RUNTIME_AREA/lib/ . \n'
846     txt += 'mv $RUNTIME_AREA/module/ . \n'
847 spiga 1.186 if self.dataExist == True:
848     txt += 'rm -r src/ \n'
849     txt += 'mv $RUNTIME_AREA/src/ . \n'
850 ewv 1.182 if len(self.additional_inbox_files)>0:
851 spiga 1.179 for file in self.additional_inbox_files:
852 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
853 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
854     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
855 ewv 1.170
856 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
857 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
858 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
859 fanzago 1.166 txt += 'else\n'
860 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
861 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
862     txt += 'fi\n'
863     txt += '\n'
864    
865 slacapra 1.302 if self.pset != None:
866 ewv 1.334 psetName = 'pset.py'
867    
868 slacapra 1.302 txt += '\n'
869     if self.debug_wrapper == 1:
870     txt += 'echo "***** cat ' + psetName + ' *********"\n'
871     txt += 'cat ' + psetName + '\n'
872     txt += 'echo "****** end ' + psetName + ' ********"\n'
873     txt += '\n'
874     txt += 'echo "***********************" \n'
875     txt += 'which edmConfigHash \n'
876     txt += 'echo "***********************" \n'
877 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
878     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
879 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
880     #### FEDE temporary fix for noEdm files #####
881     txt += 'if [ -z "$PSETHASH" ]; then \n'
882     txt += ' export PSETHASH=null\n'
883     txt += 'fi \n'
884     #############################################
885     txt += '\n'
886 fanzago 1.166 return txt
887 slacapra 1.1
888 ewv 1.131
889 slacapra 1.1 def executableName(self):
890 ewv 1.192 if self.scriptExe:
891 spiga 1.42 return "sh "
892     else:
893     return self.executable
894 slacapra 1.1
895     def executableArgs(self):
896 ewv 1.276 if self.scriptExe:
897 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
898 fanzago 1.115 else:
899 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
900 slacapra 1.1
901     def inputSandbox(self, nj):
902     """
903     Returns a list of filenames to be put in JDL input sandbox.
904     """
905     inp_box = []
906     if os.path.isfile(self.tgzNameWithPath):
907     inp_box.append(self.tgzNameWithPath)
908 spiga 1.320 if os.path.isfile(self.argsFile):
909     inp_box.append(self.argsFile)
910 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
911 slacapra 1.1 return inp_box
912    
913     def outputSandbox(self, nj):
914     """
915     Returns a list of filenames to be put in JDL output sandbox.
916     """
917     out_box = []
918    
919     ## User Declared output files
920 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
921 ewv 1.131 n_out = nj + 1
922 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
923 slacapra 1.1 return out_box
924    
925    
926     def wsRenameOutput(self, nj):
927     """
928     Returns part of a job script which renames the produced files.
929     """
930    
931 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
932 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
933     txt += 'echo ">>> current directory content:"\n'
934 fanzago 1.285 if self.debug_wrapper==1:
935 spiga 1.199 txt += 'ls -Al\n'
936 fanzago 1.145 txt += '\n'
937 slacapra 1.54
938 fanzago 1.128 for fileWithSuffix in (self.output_file):
939 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
940 slacapra 1.1 txt += '\n'
941 gutsche 1.7 txt += '# check output file\n'
942 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
943 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
944     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
945 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
946 ewv 1.147 else:
947     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
948     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
949 slacapra 1.106 txt += 'else\n'
950 fanzago 1.161 txt += ' job_exit_code=60302\n'
951     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
952 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
953 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
954     txt += ' echo "prepare dummy output file"\n'
955     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
956     txt += ' fi \n'
957 slacapra 1.1 txt += 'fi\n'
958 slacapra 1.105 file_list = []
959     for fileWithSuffix in (self.output_file):
960 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
961 ewv 1.131
962 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
963 fanzago 1.149 txt += '\n'
964 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
965     txt += 'echo ">>> current directory content:"\n'
966 fanzago 1.285 if self.debug_wrapper==1:
967 spiga 1.199 txt += 'ls -Al\n'
968 fanzago 1.148 txt += '\n'
969 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
970 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
971 slacapra 1.1 return txt
972    
973 slacapra 1.63 def getRequirements(self, nj=[]):
974 slacapra 1.1 """
975 ewv 1.131 return job requirements to add to jdl files
976 slacapra 1.1 """
977     req = ''
978 slacapra 1.47 if self.version:
979 slacapra 1.10 req='Member("VO-cms-' + \
980 slacapra 1.47 self.version + \
981 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
982 ewv 1.192 if self.executable_arch:
983 gutsche 1.107 req+=' && Member("VO-cms-' + \
984 slacapra 1.105 self.executable_arch + \
985     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
986 gutsche 1.35
987     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
988 spiga 1.352.4.1 if ( common.scheduler.name() in ["glite"] ):
989 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
990     if ( self.cfg_params.get('GRID.use_cream',None) ):
991     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
992     else:
993     req += ' && other.GlueCEStateStatus == "Production" '
994 gutsche 1.35
995 slacapra 1.1 return req
996 gutsche 1.3
997     def configFilename(self):
998     """ return the config filename """
999 ewv 1.334 return self.name()+'.py'
1000 gutsche 1.3
1001     def wsSetupCMSOSGEnvironment_(self):
1002     """
1003     Returns part of a job script which is prepares
1004     the execution environment and which is common for all CMS jobs.
1005     """
1006 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1007     txt += ' echo ">>> setup CMS OSG environment:"\n'
1008 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1009     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1010 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1011 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1012 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1013 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1014     txt += ' else\n'
1015 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1016     txt += ' job_exit_code=10020\n'
1017     txt += ' func_exit\n'
1018 fanzago 1.133 txt += ' fi\n'
1019 gutsche 1.3 txt += '\n'
1020 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1021 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1022 gutsche 1.3
1023     return txt
1024 ewv 1.131
1025 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1026     """
1027     Returns part of a job script which is prepares
1028     the execution environment and which is common for all CMS jobs.
1029     """
1030 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1031     txt += ' echo ">>> setup CMS LCG environment:"\n'
1032 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1033     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1034     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1035     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1036 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1037     txt += ' job_exit_code=10031\n'
1038     txt += ' func_exit\n'
1039 fanzago 1.133 txt += ' else\n'
1040     txt += ' echo "Sourcing environment... "\n'
1041     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1042 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1043     txt += ' job_exit_code=10020\n'
1044     txt += ' func_exit\n'
1045 fanzago 1.133 txt += ' fi\n'
1046     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1047     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1048     txt += ' result=$?\n'
1049     txt += ' if [ $result -ne 0 ]; then\n'
1050 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1051     txt += ' job_exit_code=10032\n'
1052     txt += ' func_exit\n'
1053 fanzago 1.133 txt += ' fi\n'
1054     txt += ' fi\n'
1055     txt += ' \n'
1056 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1057 gutsche 1.3 return txt
1058 gutsche 1.5
1059 spiga 1.238 def wsModifyReport(self, nj):
1060 fanzago 1.93 """
1061 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1062 fanzago 1.93 """
1063 ewv 1.250
1064 fanzago 1.281 txt = ''
1065 fanzago 1.292 if (self.copy_data == 1):
1066 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1067 ewv 1.283
1068 spiga 1.238
1069 fanzago 1.344 txt += 'if [ $StageOutExitStatus -eq 0 ] || [ $StageOutExitStatus -eq 60308 ] ; then\n'
1070 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1071 fanzago 1.175 txt += 'else\n'
1072     txt += ' FOR_LFN=/copy_problems/ \n'
1073     txt += 'fi\n'
1074 ewv 1.182
1075 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1076 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1077 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1078 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1079     txt += 'echo "endpoint = $endpoint"\n'
1080     txt += 'SE_PATH=$endpoint\n'
1081     txt += 'echo "SE_PATH = $endpoint"\n'
1082 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1083     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1084 fanzago 1.344
1085 fanzago 1.281
1086 farinafa 1.346 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $OutUniqueID for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1087 fanzago 1.318 if (self.publish_data == 1):
1088     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1089 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1090     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1091 fanzago 1.281
1092 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1093     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1094 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1095     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1096     txt += ' modifyReport_result=70500\n'
1097     txt += ' job_exit_code=$modifyReport_result\n'
1098     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1099     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1100     txt += 'else\n'
1101     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1102 spiga 1.103 txt += 'fi\n'
1103 fanzago 1.93 return txt
1104 ewv 1.283
1105 ewv 1.192 def wsParseFJR(self):
1106 spiga 1.189 """
1107 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1108 spiga 1.189 """
1109     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1110     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1111     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1112     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1113 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1114 fanzago 1.285 if self.debug_wrapper==1 :
1115 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1116     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1117 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1118     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1119 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1120 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1121 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1122     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1123 spiga 1.189 txt += ' else\n'
1124     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1125     txt += ' fi\n'
1126     txt += ' else\n'
1127     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1128     txt += ' fi\n'
1129     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1130 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1131 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1132 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1133 spiga 1.296 """
1134 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1135 spiga 1.189 # VERIFY PROCESSED DATA
1136 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1137     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1138     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1139     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1140     txt += ' mv tmp.txt input-files.txt\n'
1141     txt += ' echo "cat input-files.txt"\n'
1142     txt += ' echo "----------------------"\n'
1143     txt += ' cat input-files.txt\n'
1144     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1145     txt += ' mv tmp.txt processed-files.txt\n'
1146     txt += ' echo "----------------------"\n'
1147     txt += ' echo "cat processed-files.txt"\n'
1148     txt += ' echo "----------------------"\n'
1149     txt += ' cat processed-files.txt\n'
1150     txt += ' echo "----------------------"\n'
1151 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1152 fanzago 1.273 txt += ' fileverify_status=$?\n'
1153     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1154     txt += ' executable_exit_status=30001\n'
1155     txt += ' echo "ERROR ==> not all input files processed"\n'
1156     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1157     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1158     txt += ' fi\n'
1159 spiga 1.296 """
1160 spiga 1.232 txt += ' fi\n'
1161 spiga 1.189 txt += 'else\n'
1162     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1163     txt += 'fi\n'
1164     txt += '\n'
1165 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1166 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1167     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1168     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1169     txt += ' job_exit_code=$executable_exit_status\n'
1170     txt += ' func_exit\n'
1171     txt += 'fi\n\n'
1172 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1173     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1174     txt += 'job_exit_code=$executable_exit_status\n'
1175    
1176     return txt
1177    
1178 gutsche 1.5 def setParam_(self, param, value):
1179     self._params[param] = value
1180    
1181     def getParams(self):
1182     return self._params
1183 gutsche 1.8
1184 spiga 1.257 def outList(self,list=False):
1185 mcinquil 1.121 """
1186     check the dimension of the output files
1187     """
1188 spiga 1.169 txt = ''
1189     txt += 'echo ">>> list of expected files on output sandbox"\n'
1190 mcinquil 1.121 listOutFiles = []
1191 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1192 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1193 spiga 1.268 if len(self.output_file) <= 0:
1194     msg ="WARNING: no output files name have been defined!!\n"
1195     msg+="\tno output files will be reported back/staged\n"
1196 spiga 1.304 common.logger.info(msg)
1197 ewv 1.350
1198 fanzago 1.148 if (self.return_data == 1):
1199 farinafa 1.348 for file in (self.output_file):
1200     listOutFiles.append(numberFile(file, '$OutUniqueID'))
1201     for file in (self.output_file_sandbox):
1202     listOutFiles.append(numberFile(file, '$NJob'))
1203     listOutFiles.append(stdout)
1204     listOutFiles.append(stderr)
1205    
1206 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1207 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1208 spiga 1.169 txt += 'export filesToCheck\n'
1209 spiga 1.341 taskinfo={}
1210     taskinfo['outfileBasename'] = self.output_file
1211     common._db.updateTask_(taskinfo)
1212 ewv 1.276
1213 spiga 1.257 if list : return self.output_file
1214 ewv 1.170 return txt