ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.349.2.1
Committed: Thu Jan 21 16:13:28 2010 UTC (15 years, 3 months ago) by ewv
Content type: text/x-python
Branch: DBSMART
Changes since 1.349: +17 -7 lines
Log Message:
Branch for DBSMART, probably never will be used

File Contents

# User Rev Content
1 ewv 1.327
2 slacapra 1.349 __revision__ = "$Id: cms_cmssw.py,v 1.348 2010/01/05 15:40:57 farinafa Exp $"
3     __version__ = "$Revision: 1.348 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 farinafa 1.346 self.argsList = 2
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 mcinquil 1.340 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.338 ll = len(self.datasetPath.split("/"))
92 ewv 1.349.2.1 if ll not in [1,4,5]:
93 fanzago 1.338 msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
94     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
95     raise CrabException(msg)
96 ewv 1.349.2.1 if ll > 1:
97     self.primaryDataset = self.datasetPath.split("/")[1]
98     self.dataTier = self.datasetPath.split("/")[2]
99     else:
100     self.primaryDataset = 'Unknown'
101     self.dataTier = 'Unknown'
102 gutsche 1.5
103 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
104 ewv 1.330 self.ads = False
105     if self.datasetPath:
106 ewv 1.349.2.1 self.ads = len(self.datasetPath.split("/")) > 4 or len(self.datasetPath.split("/")) == 1
107 ewv 1.295
108 ewv 1.327 # FUTURE: Can remove this check
109     if self.ads and self.CMSSW_major < 3:
110     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
111     common.logger.info(' Only file level, not lumi level, granularity is supported.')
112    
113 spiga 1.288 self.debugWrap=''
114 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
115     if self.debug_wrapper == 1: self.debugWrap='--debug'
116 slacapra 1.291
117 slacapra 1.1 ## now the application
118 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
119 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
120 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
121 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
122 slacapra 1.1
123 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
124 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
125 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
126 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
127 slacapra 1.153 if self.pset.lower() != 'none' :
128     if (not os.path.exists(self.pset)):
129     raise CrabException("User defined PSet file "+self.pset+" does not exist")
130     else:
131     self.pset = None
132 slacapra 1.1
133     # output files
134 slacapra 1.53 ## stuff which must be returned always via sandbox
135     self.output_file_sandbox = []
136    
137     # add fjr report by default via sandbox
138     self.output_file_sandbox.append(self.fjrFileName)
139    
140     # other output files to be returned via sandbox or copied to SE
141 mcinquil 1.216 outfileflag = False
142 slacapra 1.153 self.output_file = []
143     tmp = cfg_params.get('CMSSW.output_file',None)
144     if tmp :
145 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
146 mcinquil 1.216 outfileflag = True #output found
147     #else:
148     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
149 slacapra 1.1
150     # script_exe file as additional file in inputSandbox
151 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
152     if self.scriptExe :
153 slacapra 1.176 if not os.path.isfile(self.scriptExe):
154     msg ="ERROR. file "+self.scriptExe+" not found"
155     raise CrabException(msg)
156     self.additional_inbox_files.append(string.strip(self.scriptExe))
157 slacapra 1.70
158 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
159     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
160    
161 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
162 slacapra 1.176 msg ="Error. script_exe not defined"
163     raise CrabException(msg)
164 spiga 1.42
165 ewv 1.226 # use parent files...
166 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
167 spiga 1.204
168 slacapra 1.1 ## additional input files
169 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
170 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
171 slacapra 1.70 for tmp in tmpAddFiles:
172     tmp = string.strip(tmp)
173     dirname = ''
174     if not tmp[0]=="/": dirname = "."
175 corvo 1.85 files = []
176     if string.find(tmp,"*")>-1:
177     files = glob.glob(os.path.join(dirname, tmp))
178     if len(files)==0:
179     raise CrabException("No additional input file found with this pattern: "+tmp)
180     else:
181     files.append(tmp)
182 slacapra 1.70 for file in files:
183     if not os.path.exists(file):
184     raise CrabException("Additional input file not found: "+file)
185 slacapra 1.45 pass
186 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
187 slacapra 1.1 pass
188     pass
189 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
190 slacapra 1.153 pass
191 gutsche 1.3
192 gutsche 1.35
193 ewv 1.160 ## New method of dealing with seeds
194     self.incrementSeeds = []
195     self.preserveSeeds = []
196     if cfg_params.has_key('CMSSW.preserve_seeds'):
197     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
198     for tmp in tmpList:
199     tmp.strip()
200     self.preserveSeeds.append(tmp)
201     if cfg_params.has_key('CMSSW.increment_seeds'):
202     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
203     for tmp in tmpList:
204     tmp.strip()
205     self.incrementSeeds.append(tmp)
206    
207 fanzago 1.318 # Copy/return/publish
208 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
209     self.return_data = int(cfg_params.get('USER.return_data',0))
210 fanzago 1.318 self.publish_data = int(cfg_params.get('USER.publish_data',0))
211     if (self.publish_data == 1):
212     if not cfg_params.has_key('USER.publish_data_name'):
213     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
214     else:
215     self.processedDataset = cfg_params['USER.publish_data_name']
216 ewv 1.329 """
217 fanzago 1.328 #### check of length of datasetname to publish ####
218 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
219 fanzago 1.328 print "test 100 char limit on datasetname"
220     ###
221     len_file = 0
222     print "self.output_file = ", self.output_file
223     for file in self.output_file:
224     length = len(file)
225     if length > len_file:
226     len_file = length
227 ewv 1.329 print "len_file = ", len_file
228 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
229 ewv 1.329 ###
230 fanzago 1.318 user = getUserName()
231 fanzago 1.328 len_user_name = len(user)
232 fanzago 1.318 common.logger.debug("user = " + user)
233 fanzago 1.328 print "len_user_name = ", len_user_name
234 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
235 fanzago 1.328
236 fanzago 1.318 len_processedDataset = len(self.processedDataset)
237     common.logger.debug("processedDataset " + self.processedDataset)
238     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
239 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
240 ewv 1.329
241 fanzago 1.318 if (self.datasetPath != None ):
242     len_primary = len(self.primaryDataset)
243     common.logger.debug("primaryDataset = " + self.primaryDataset)
244     common.logger.debug("len_primary = " + str(len_primary))
245 fanzago 1.328 if (len_primary > 100):
246     raise CrabException("Warning: primary datasetname has to be < 100 characters")
247     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
248     #if (len_processedDataset > (59 - len_user_name - len_primary)):
249     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
250     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
251     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
252 fanzago 1.318 else:
253 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
254     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
255     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
256     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
257 ewv 1.329 """
258 ewv 1.276
259     self.conf = {}
260     self.conf['pubdata'] = None
261 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
262 slacapra 1.1 #DBSDLS-start
263 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
264 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
265     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
266 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
267 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
268 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
269 gutsche 1.35 blockSites = {}
270 spiga 1.342 #wmbs
271     self.automation = int(self.cfg_params.get('WMBS.automation',0))
272     if self.automation == 0:
273     if self.datasetPath:
274     blockSites = self.DataDiscoveryAndLocation(cfg_params)
275     #DBSDLS-end
276     self.conf['blockSites']=blockSites
277 ewv 1.347
278 spiga 1.342 ## Select Splitting
279     splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
280 ewv 1.347
281 spiga 1.342 if self.selectNoInput:
282     if self.pset == None:
283     self.algo = 'ForScript'
284     else:
285     self.algo = 'NoInput'
286     self.conf['managedGenerators']=self.managedGenerators
287     self.conf['generator']=self.generator
288     elif self.ads:
289     self.algo = 'LumiBased'
290     elif splitByRun ==1:
291     self.algo = 'RunBased'
292 spiga 1.42 else:
293 spiga 1.342 self.algo = 'EventBased'
294     common.logger.debug("Job splitting method: %s" % self.algo)
295 ewv 1.347
296 spiga 1.342 splitter = JobSplitter(self.cfg_params,self.conf)
297     self.dict = splitter.Algos()[self.algo]()
298 gutsche 1.5
299 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
300     self.rootArgsFilename= 'arguments'
301 spiga 1.208 # modify Pset only the first time
302 spiga 1.320 if isNew:
303     if self.pset != None: self.ModifyPset()
304 spiga 1.300
305 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
306     self.tarNameWithPath = self.getTarBall(self.executable)
307 spiga 1.293
308    
309     def ModifyPset(self):
310     import PsetManipulator as pp
311 ewv 1.335
312     # If pycfg_params set, fake out the config script
313     # to make it think it was called with those args
314     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
315     if pycfg_params:
316     trueArgv = sys.argv
317     sys.argv = [self.pset]
318     sys.argv.extend(pycfg_params.split(' '))
319 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
320 ewv 1.335 if pycfg_params: # Restore original sys.argv
321     sys.argv = trueArgv
322    
323 spiga 1.293 try:
324     # Add FrameworkJobReport to parameter-set, set max events.
325     # Reset later for data jobs by writeCFG which does all modifications
326 ewv 1.295 PsetEdit.maxEvent(1)
327 spiga 1.293 PsetEdit.skipEvent(0)
328     PsetEdit.psetWriter(self.configFilename())
329     ## If present, add TFileService to output files
330 ewv 1.349.2.1 if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
331 spiga 1.293 tfsOutput = PsetEdit.getTFileService()
332     if tfsOutput:
333     if tfsOutput in self.output_file:
334 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
335 spiga 1.293 else:
336     outfileflag = True #output found
337     self.output_file.append(tfsOutput)
338 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
339 spiga 1.293 pass
340     pass
341 ewv 1.321 # If present and requested, add PoolOutputModule to output files
342 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
343 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
344     if edmOutput:
345 ewv 1.321 for outputFile in edmOutput:
346     if outputFile in self.output_file:
347 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
348 ewv 1.321 else:
349     self.output_file.append(outputFile)
350     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
351     # not requested, check anyhow to avoid accidental T2 overload
352 slacapra 1.297 else:
353 ewv 1.321 if edmOutput:
354     missedFiles = []
355     for outputFile in edmOutput:
356     if outputFile not in self.output_file:
357     missedFiles.append(outputFile)
358     if missedFiles:
359     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
360     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
361     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
362     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
363     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
364     common.logger.info(msg)
365 spiga 1.322 else :
366 ewv 1.321 raise CrabException(msg)
367 ewv 1.301
368     if (PsetEdit.getBadFilesSetting()):
369     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
370 spiga 1.304 common.logger.info(msg)
371 ewv 1.301
372 slacapra 1.297 except CrabException, msg:
373 spiga 1.304 common.logger.info(str(msg))
374 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
375 spiga 1.293 raise CrabException(msg)
376    
377 gutsche 1.3
378 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
379    
380 slacapra 1.86 import DataDiscovery
381     import DataLocation
382 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
383 gutsche 1.3
384     datasetPath=self.datasetPath
385    
386 slacapra 1.1 ## Contact the DBS
387 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
388 slacapra 1.1 try:
389 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
390 slacapra 1.1 self.pubdata.fetchDBSInfo()
391    
392 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
393 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
394     raise CrabException(msg)
395 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
396 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
397     raise CrabException(msg)
398 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
399 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
400 slacapra 1.1 raise CrabException(msg)
401    
402 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
403 slacapra 1.270 #print self.filesbyblock
404 spiga 1.269 self.conf['pubdata']=self.pubdata
405 gutsche 1.3
406 slacapra 1.1 ## get max number of events
407 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
408 slacapra 1.1
409     ## Contact the DLS and build a list of sites hosting the fileblocks
410     try:
411 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
412 gutsche 1.6 dataloc.fetchDLSInfo()
413 slacapra 1.263
414 slacapra 1.41 except DataLocation.DataLocationError , ex:
415 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
416     raise CrabException(msg)
417 ewv 1.131
418 slacapra 1.1
419 slacapra 1.270 unsorted_sites = dataloc.getSites()
420     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
421     for lfn in self.filesbyblock.keys():
422     if unsorted_sites.has_key(lfn):
423     sites[lfn]=unsorted_sites[lfn]
424     else:
425     sites[lfn]=[]
426    
427 slacapra 1.264 if len(sites)==0:
428 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
429     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
430     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
431 slacapra 1.264 raise CrabException(msg)
432    
433 gutsche 1.35 allSites = []
434     listSites = sites.values()
435 slacapra 1.63 for listSite in listSites:
436     for oneSite in listSite:
437 gutsche 1.35 allSites.append(oneSite)
438 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
439 ewv 1.295
440 gutsche 1.3
441 gutsche 1.92 # screen output
442 ewv 1.349.2.1 if self.ads:
443     common.logger.info("Requested ADS %s has %s block(s)." %
444     (datasetPath, len(self.filesbyblock.keys())))
445     else:
446     common.logger.info("Requested dataset: " + datasetPath + \
447     " has " + str(self.maxEvents) + " events in " + \
448     str(len(self.filesbyblock.keys())) + " blocks.\n")
449 gutsche 1.92
450 gutsche 1.35 return sites
451 ewv 1.131
452 spiga 1.42
453 spiga 1.208 def split(self, jobParams,firstJobID):
454 ewv 1.276
455 spiga 1.293 jobParams = self.dict['args']
456 spiga 1.269 njobs = self.dict['njobs']
457     self.jobDestination = self.dict['jobDestination']
458 ewv 1.131
459 ewv 1.333 if njobs == 0:
460     raise CrabException("Asked to split zero jobs: aborting")
461     if not self.server and not self.local and njobs > 500:
462     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
463 slacapra 1.263
464 gutsche 1.3 # create the empty structure
465     for i in range(njobs):
466     jobParams.append("")
467 ewv 1.131
468 spiga 1.165 listID=[]
469     listField=[]
470 spiga 1.293 listDictions=[]
471 spiga 1.300 exist= os.path.exists(self.argsFile)
472 spiga 1.208 for id in range(njobs):
473     job = id + int(firstJobID)
474 spiga 1.167 listID.append(job+1)
475 spiga 1.162 job_ToSave ={}
476 spiga 1.169 concString = ' '
477 spiga 1.165 argu=''
478 spiga 1.293 str_argu = str(job+1)
479 spiga 1.208 if len(jobParams[id]):
480 ewv 1.295 argu = {'JobID': job+1}
481 spiga 1.293 for i in range(len(jobParams[id])):
482     argu[self.dict['params'][i]]=jobParams[id][i]
483 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
484 ewv 1.295 # just for debug
485 spiga 1.293 str_argu += concString.join(jobParams[id])
486 spiga 1.314 if argu != '': listDictions.append(argu)
487 ewv 1.347 job_ToSave['arguments']= '%d %d'%( (job+1), 0)
488 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
489 spiga 1.165 listField.append(job_ToSave)
490 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
491     cms_se = CmsSEMap()
492 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
493 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
494 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
495     msg+="\t CMSDestination: %s "%(str(SEDestination))
496 spiga 1.307 common.logger.log(10-1,msg)
497 spiga 1.293 # write xml
498 ewv 1.295 if len(listDictions):
499 spiga 1.293 if exist==False: self.CreateXML()
500     self.addEntry(listDictions)
501 spiga 1.320 # self.zipXMLfile()
502 spiga 1.187 common._db.updateJob_(listID,listField)
503 spiga 1.293 return
504 ewv 1.313
505 spiga 1.320 # def zipXMLfile(self):
506 ewv 1.313
507 spiga 1.320 # import tarfile
508     # try:
509     # tar = tarfile.open(self.tarNameWithPath, "a")
510     # tar.add(self.argsFile, os.path.basename(self.argsFile))
511     # tar.close()
512     # except IOError, exc:
513     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
514     # msg += str(exc)
515     # raise CrabException(msg)
516     # except tarfile.TarError, exc:
517     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
518     # msg += str(exc)
519     # raise CrabException(msg)
520 ewv 1.325
521 spiga 1.293 def CreateXML(self):
522     """
523 ewv 1.295 """
524 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
525     outfile = file( self.argsFile, 'w').write(str(result))
526 ewv 1.295 return
527 spiga 1.293
528     def addEntry(self, listDictions):
529     """
530     _addEntry_
531 ewv 1.295
532 spiga 1.293 add an entry to the xml file
533     """
534     from IMProv.IMProvLoader import loadIMProvFile
535     ## load xml
536 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
537 spiga 1.293 entrname= 'Job'
538     for dictions in listDictions:
539     report = IMProvNode(entrname , None, **dictions)
540     improvDoc.addNode(report)
541 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
542 gutsche 1.3 return
543 ewv 1.131
544 gutsche 1.3 def numberOfJobs(self):
545 spiga 1.342 #wmbs
546 ewv 1.347 if self.automation==0:
547 spiga 1.342 return self.dict['njobs']
548     else:
549     return None
550 ewv 1.347
551 slacapra 1.1 def getTarBall(self, exe):
552     """
553     Return the TarBall with lib and exe
554     """
555 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
556     if os.path.exists(self.tgzNameWithPath):
557     return self.tgzNameWithPath
558 slacapra 1.1
559     # Prepare a tar gzipped file with user binaries.
560     self.buildTar_(exe)
561    
562 spiga 1.320 return string.strip(self.tgzNameWithPath)
563 slacapra 1.1
564     def buildTar_(self, executable):
565    
566     # First of all declare the user Scram area
567     swArea = self.scram.getSWArea_()
568     swReleaseTop = self.scram.getReleaseTop_()
569 ewv 1.131
570 slacapra 1.1 ## check if working area is release top
571     if swReleaseTop == '' or swArea == swReleaseTop:
572 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
573 slacapra 1.1 return
574    
575 slacapra 1.61 import tarfile
576     try: # create tar ball
577 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
578 slacapra 1.61 ## First find the executable
579 slacapra 1.86 if (self.executable != ''):
580 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
581     if ( not exeWithPath ):
582     raise CrabException('User executable '+executable+' not found')
583 ewv 1.131
584 slacapra 1.61 ## then check if it's private or not
585     if exeWithPath.find(swReleaseTop) == -1:
586     # the exe is private, so we must ship
587 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
588 slacapra 1.61 path = swArea+'/'
589 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
590     if exeWithPath.find(path) >= 0 :
591     exe = string.replace(exeWithPath, path,'')
592 slacapra 1.129 tar.add(path+exe,exe)
593 corvo 1.85 else :
594     tar.add(exeWithPath,os.path.basename(executable))
595 slacapra 1.61 pass
596     else:
597     # the exe is from release, we'll find it on WN
598     pass
599 ewv 1.131
600 slacapra 1.61 ## Now get the libraries: only those in local working area
601 slacapra 1.256 tar.dereference=True
602 slacapra 1.61 libDir = 'lib'
603     lib = swArea+'/' +libDir
604 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
605 slacapra 1.61 if os.path.exists(lib):
606     tar.add(lib,libDir)
607 ewv 1.131
608 slacapra 1.61 ## Now check if module dir is present
609     moduleDir = 'module'
610     module = swArea + '/' + moduleDir
611     if os.path.isdir(module):
612     tar.add(module,moduleDir)
613 slacapra 1.256 tar.dereference=False
614 slacapra 1.61
615     ## Now check if any data dir(s) is present
616 spiga 1.179 self.dataExist = False
617 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
618 slacapra 1.206 while len(todo_list):
619     entry, name = todo_list.pop()
620 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
621 slacapra 1.206 continue
622 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
623 slacapra 1.206 entryPath = entry + '/'
624 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
625 slacapra 1.206 if name == 'data':
626     self.dataExist=True
627 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
628 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
629 slacapra 1.206 pass
630     pass
631 ewv 1.182
632 spiga 1.179 ### CMSSW ParameterSet
633     if not self.pset is None:
634     cfg_file = common.work_space.jobDir()+self.configFilename()
635 ewv 1.182 tar.add(cfg_file,self.configFilename())
636 ewv 1.313
637 spiga 1.309 try:
638     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
639     tar.add(crab_cfg_file,'crab.cfg')
640     except:
641     pass
642 fanzago 1.93
643 fanzago 1.152 ## Add ProdCommon dir to tar
644 slacapra 1.211 prodcommonDir = './'
645     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
646 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
647 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
648     'WMCore/__init__.py','WMCore/Algorithms']
649 slacapra 1.214 for file in neededStuff:
650     tar.add(prodcommonPath+file,prodcommonDir+file)
651 spiga 1.179
652     ##### ML stuff
653     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
654     path=os.environ['CRABDIR'] + '/python/'
655     for file in ML_file_list:
656     tar.add(path+file,file)
657    
658     ##### Utils
659 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
660 spiga 1.179 for file in Utils_file_list:
661     tar.add(path+file,file)
662 ewv 1.131
663 ewv 1.182 ##### AdditionalFiles
664 slacapra 1.253 tar.dereference=True
665 spiga 1.179 for file in self.additional_inbox_files:
666     tar.add(file,string.split(file,'/')[-1])
667 slacapra 1.253 tar.dereference=False
668 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
669 ewv 1.182
670 slacapra 1.61 tar.close()
671 mcinquil 1.241 except IOError, exc:
672 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
673 spiga 1.304 msg += str(exc)
674     raise CrabException(msg)
675 mcinquil 1.241 except tarfile.TarError, exc:
676 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
677 spiga 1.304 msg += str(exc)
678     raise CrabException(msg)
679 spiga 1.300
680 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
681     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
682 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
683 ewv 1.250 +'MB input sandbox limit \n'
684 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
685     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
686 mcinquil 1.336 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServerForUsers#Server_available_for_users'
687 spiga 1.238 raise CrabException(msg)
688 gutsche 1.72
689 slacapra 1.61 ## create tar-ball with ML stuff
690 slacapra 1.97
691 spiga 1.165 def wsSetupEnvironment(self, nj=0):
692 slacapra 1.1 """
693     Returns part of a job script which prepares
694     the execution environment for the job 'nj'.
695     """
696 ewv 1.334 psetName = 'pset.py'
697    
698 slacapra 1.1 # Prepare JobType-independent part
699 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
700 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
701 ewv 1.347 txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
702     txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
703     txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
704 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
705 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
706 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
707 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
708 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
709 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
710     txt += ' job_exit_code=10016\n'
711     txt += ' func_exit\n'
712 gutsche 1.3 txt += ' fi\n'
713 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
714 gutsche 1.3 txt += '\n'
715     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
716     txt += ' cd $WORKING_DIR\n'
717 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
718 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
719 spiga 1.282 #Setup SGE Environment
720 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
721 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
722    
723 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
724     txt += self.wsSetupCMSLCGEnvironment_()
725    
726 mcinquil 1.340 #Setup PBS Environment
727 mcinquil 1.343 txt += 'elif [ $middleware == PBS ]; then\n'
728 mcinquil 1.340 txt += self.wsSetupCMSLCGEnvironment_()
729    
730 gutsche 1.3 txt += 'fi\n'
731 slacapra 1.1
732     # Prepare JobType-specific part
733     scram = self.scram.commandName()
734     txt += '\n\n'
735 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
736     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
737 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
738     txt += 'status=$?\n'
739     txt += 'if [ $status != 0 ] ; then\n'
740 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
741     txt += ' job_exit_code=10034\n'
742 fanzago 1.163 txt += ' func_exit\n'
743 slacapra 1.1 txt += 'fi \n'
744     txt += 'cd '+self.version+'\n'
745 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
746 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
747 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
748 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
749     txt += ' echo "ERROR ==> Problem with the command: "\n'
750     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
751     txt += ' job_exit_code=10034\n'
752     txt += ' func_exit\n'
753     txt += 'fi \n'
754 slacapra 1.1 # Handle the arguments:
755     txt += "\n"
756 farinafa 1.346 txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
757 slacapra 1.1 txt += "\n"
758 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
759 slacapra 1.1 txt += "then\n"
760 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
761     txt += ' job_exit_code=50113\n'
762     txt += " func_exit\n"
763 slacapra 1.1 txt += "fi\n"
764     txt += "\n"
765    
766     # Prepare job-specific part
767     job = common.job_list[nj]
768 ewv 1.131 if (self.datasetPath):
769 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
770     #DataTier = self.datasetPath.split("/")[2]
771 fanzago 1.93 txt += '\n'
772     txt += 'DatasetPath='+self.datasetPath+'\n'
773    
774 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
775 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
776 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
777 fanzago 1.93
778     else:
779 fanzago 1.318 #self.primaryDataset = 'null'
780 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
781     txt += 'PrimaryDataset=null\n'
782     txt += 'DataTier=null\n'
783     txt += 'ApplicationFamily=MCDataTier\n'
784 ewv 1.170 if self.pset != None:
785 spiga 1.42 pset = os.path.basename(job.configFilename())
786     txt += '\n'
787 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
788 spiga 1.296
789 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
790     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
791     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
792     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
793 slacapra 1.90
794 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
795 ewv 1.319 else:
796 spiga 1.314 txt += '\n'
797 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
798 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
799 gutsche 1.3 return txt
800 slacapra 1.176
801 fanzago 1.166 def wsUntarSoftware(self, nj=0):
802 gutsche 1.3 """
803     Put in the script the commands to build an executable
804     or a library.
805     """
806    
807 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
808 gutsche 1.3
809     if os.path.isfile(self.tgzNameWithPath):
810 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
811 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
812 fanzago 1.285 if self.debug_wrapper==1 :
813 spiga 1.199 txt += 'ls -Al \n'
814 gutsche 1.3 txt += 'untar_status=$? \n'
815     txt += 'if [ $untar_status -ne 0 ]; then \n'
816 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
817     txt += ' job_exit_code=$untar_status\n'
818     txt += ' func_exit\n'
819 gutsche 1.3 txt += 'else \n'
820     txt += ' echo "Successful untar" \n'
821     txt += 'fi \n'
822 gutsche 1.50 txt += '\n'
823 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
824 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
825 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
826 gutsche 1.50 txt += 'else\n'
827 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
828 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
829 gutsche 1.50 txt += 'fi\n'
830     txt += '\n'
831    
832 gutsche 1.3 pass
833 ewv 1.131
834 slacapra 1.1 return txt
835 ewv 1.170
836 fanzago 1.166 def wsBuildExe(self, nj=0):
837     """
838     Put in the script the commands to build an executable
839     or a library.
840     """
841    
842     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
843     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
844    
845 ewv 1.170 txt += 'rm -r lib/ module/ \n'
846     txt += 'mv $RUNTIME_AREA/lib/ . \n'
847     txt += 'mv $RUNTIME_AREA/module/ . \n'
848 spiga 1.186 if self.dataExist == True:
849     txt += 'rm -r src/ \n'
850     txt += 'mv $RUNTIME_AREA/src/ . \n'
851 ewv 1.182 if len(self.additional_inbox_files)>0:
852 spiga 1.179 for file in self.additional_inbox_files:
853 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
854 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
855     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
856 ewv 1.170
857 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
858 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
859 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
860 fanzago 1.166 txt += 'else\n'
861 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
862 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
863     txt += 'fi\n'
864     txt += '\n'
865    
866 slacapra 1.302 if self.pset != None:
867 ewv 1.334 psetName = 'pset.py'
868    
869 slacapra 1.302 txt += '\n'
870     if self.debug_wrapper == 1:
871     txt += 'echo "***** cat ' + psetName + ' *********"\n'
872     txt += 'cat ' + psetName + '\n'
873     txt += 'echo "****** end ' + psetName + ' ********"\n'
874     txt += '\n'
875     txt += 'echo "***********************" \n'
876     txt += 'which edmConfigHash \n'
877     txt += 'echo "***********************" \n'
878 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
879     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
880 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
881     #### FEDE temporary fix for noEdm files #####
882     txt += 'if [ -z "$PSETHASH" ]; then \n'
883     txt += ' export PSETHASH=null\n'
884     txt += 'fi \n'
885     #############################################
886     txt += '\n'
887 fanzago 1.166 return txt
888 slacapra 1.1
889 ewv 1.131
890 slacapra 1.1 def executableName(self):
891 ewv 1.192 if self.scriptExe:
892 spiga 1.42 return "sh "
893     else:
894     return self.executable
895 slacapra 1.1
896     def executableArgs(self):
897 ewv 1.276 if self.scriptExe:
898 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
899 fanzago 1.115 else:
900 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
901 slacapra 1.1
902     def inputSandbox(self, nj):
903     """
904     Returns a list of filenames to be put in JDL input sandbox.
905     """
906     inp_box = []
907     if os.path.isfile(self.tgzNameWithPath):
908     inp_box.append(self.tgzNameWithPath)
909 spiga 1.320 if os.path.isfile(self.argsFile):
910     inp_box.append(self.argsFile)
911 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
912 slacapra 1.1 return inp_box
913    
914     def outputSandbox(self, nj):
915     """
916     Returns a list of filenames to be put in JDL output sandbox.
917     """
918     out_box = []
919    
920     ## User Declared output files
921 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
922 ewv 1.131 n_out = nj + 1
923 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
924 slacapra 1.1 return out_box
925    
926    
927     def wsRenameOutput(self, nj):
928     """
929     Returns part of a job script which renames the produced files.
930     """
931    
932 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
933 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
934     txt += 'echo ">>> current directory content:"\n'
935 fanzago 1.285 if self.debug_wrapper==1:
936 spiga 1.199 txt += 'ls -Al\n'
937 fanzago 1.145 txt += '\n'
938 slacapra 1.54
939 fanzago 1.128 for fileWithSuffix in (self.output_file):
940 farinafa 1.346 output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
941 slacapra 1.1 txt += '\n'
942 gutsche 1.7 txt += '# check output file\n'
943 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
944 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
945     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
946 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
947 ewv 1.147 else:
948     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
949     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
950 slacapra 1.106 txt += 'else\n'
951 fanzago 1.161 txt += ' job_exit_code=60302\n'
952     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
953 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
954 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
955     txt += ' echo "prepare dummy output file"\n'
956     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
957     txt += ' fi \n'
958 slacapra 1.1 txt += 'fi\n'
959 slacapra 1.105 file_list = []
960     for fileWithSuffix in (self.output_file):
961 farinafa 1.346 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
962 ewv 1.131
963 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
964 fanzago 1.149 txt += '\n'
965 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
966     txt += 'echo ">>> current directory content:"\n'
967 fanzago 1.285 if self.debug_wrapper==1:
968 spiga 1.199 txt += 'ls -Al\n'
969 fanzago 1.148 txt += '\n'
970 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
971 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
972 slacapra 1.1 return txt
973    
974 slacapra 1.63 def getRequirements(self, nj=[]):
975 slacapra 1.1 """
976 ewv 1.131 return job requirements to add to jdl files
977 slacapra 1.1 """
978     req = ''
979 slacapra 1.47 if self.version:
980 slacapra 1.10 req='Member("VO-cms-' + \
981 slacapra 1.47 self.version + \
982 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
983 ewv 1.192 if self.executable_arch:
984 gutsche 1.107 req+=' && Member("VO-cms-' + \
985 slacapra 1.105 self.executable_arch + \
986     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
987 gutsche 1.35
988     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
989 spiga 1.345 if ( common.scheduler.name() in ["glitecoll", "glite","glite_slc5"] ):
990 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
991     if ( self.cfg_params.get('GRID.use_cream',None) ):
992     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
993     else:
994     req += ' && other.GlueCEStateStatus == "Production" '
995 gutsche 1.35
996 slacapra 1.1 return req
997 gutsche 1.3
998     def configFilename(self):
999     """ return the config filename """
1000 ewv 1.334 return self.name()+'.py'
1001 gutsche 1.3
1002     def wsSetupCMSOSGEnvironment_(self):
1003     """
1004     Returns part of a job script which is prepares
1005     the execution environment and which is common for all CMS jobs.
1006     """
1007 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
1008     txt += ' echo ">>> setup CMS OSG environment:"\n'
1009 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
1010     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1011 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1012 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1013 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1014 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1015     txt += ' else\n'
1016 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1017     txt += ' job_exit_code=10020\n'
1018     txt += ' func_exit\n'
1019 fanzago 1.133 txt += ' fi\n'
1020 gutsche 1.3 txt += '\n'
1021 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1022 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1023 gutsche 1.3
1024     return txt
1025 ewv 1.131
1026 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1027     """
1028     Returns part of a job script which is prepares
1029     the execution environment and which is common for all CMS jobs.
1030     """
1031 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1032     txt += ' echo ">>> setup CMS LCG environment:"\n'
1033 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1034     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1035     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1036     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1037 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1038     txt += ' job_exit_code=10031\n'
1039     txt += ' func_exit\n'
1040 fanzago 1.133 txt += ' else\n'
1041     txt += ' echo "Sourcing environment... "\n'
1042     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1043 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1044     txt += ' job_exit_code=10020\n'
1045     txt += ' func_exit\n'
1046 fanzago 1.133 txt += ' fi\n'
1047     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1048     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1049     txt += ' result=$?\n'
1050     txt += ' if [ $result -ne 0 ]; then\n'
1051 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1052     txt += ' job_exit_code=10032\n'
1053     txt += ' func_exit\n'
1054 fanzago 1.133 txt += ' fi\n'
1055     txt += ' fi\n'
1056     txt += ' \n'
1057 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1058 gutsche 1.3 return txt
1059 gutsche 1.5
1060 spiga 1.238 def wsModifyReport(self, nj):
1061 fanzago 1.93 """
1062 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1063 fanzago 1.93 """
1064 ewv 1.250
1065 fanzago 1.281 txt = ''
1066 fanzago 1.292 if (self.copy_data == 1):
1067 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1068 ewv 1.283
1069 spiga 1.238
1070 fanzago 1.344 txt += 'if [ $StageOutExitStatus -eq 0 ] || [ $StageOutExitStatus -eq 60308 ] ; then\n'
1071 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1072 fanzago 1.175 txt += 'else\n'
1073     txt += ' FOR_LFN=/copy_problems/ \n'
1074     txt += 'fi\n'
1075 ewv 1.182
1076 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1077 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1078 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1079 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1080     txt += 'echo "endpoint = $endpoint"\n'
1081     txt += 'SE_PATH=$endpoint\n'
1082     txt += 'echo "SE_PATH = $endpoint"\n'
1083 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1084     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1085 fanzago 1.344
1086 fanzago 1.281
1087 farinafa 1.346 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $OutUniqueID for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1088 fanzago 1.318 if (self.publish_data == 1):
1089     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1090 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1091     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1092 fanzago 1.281
1093 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1094     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1095 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1096     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1097     txt += ' modifyReport_result=70500\n'
1098     txt += ' job_exit_code=$modifyReport_result\n'
1099     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1100     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1101     txt += 'else\n'
1102     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1103 spiga 1.103 txt += 'fi\n'
1104 fanzago 1.93 return txt
1105 ewv 1.283
1106 ewv 1.192 def wsParseFJR(self):
1107 spiga 1.189 """
1108 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1109 spiga 1.189 """
1110     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1111     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1112     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1113     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1114 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1115 fanzago 1.285 if self.debug_wrapper==1 :
1116 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1117     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1118 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1119     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1120 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1121 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1122 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1123     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1124 spiga 1.189 txt += ' else\n'
1125     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1126     txt += ' fi\n'
1127     txt += ' else\n'
1128     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1129     txt += ' fi\n'
1130     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1131 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1132 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1133 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1134 spiga 1.296 """
1135 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1136 spiga 1.189 # VERIFY PROCESSED DATA
1137 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1138     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1139     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1140     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1141     txt += ' mv tmp.txt input-files.txt\n'
1142     txt += ' echo "cat input-files.txt"\n'
1143     txt += ' echo "----------------------"\n'
1144     txt += ' cat input-files.txt\n'
1145     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1146     txt += ' mv tmp.txt processed-files.txt\n'
1147     txt += ' echo "----------------------"\n'
1148     txt += ' echo "cat processed-files.txt"\n'
1149     txt += ' echo "----------------------"\n'
1150     txt += ' cat processed-files.txt\n'
1151     txt += ' echo "----------------------"\n'
1152 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1153 fanzago 1.273 txt += ' fileverify_status=$?\n'
1154     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1155     txt += ' executable_exit_status=30001\n'
1156     txt += ' echo "ERROR ==> not all input files processed"\n'
1157     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1158     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1159     txt += ' fi\n'
1160 spiga 1.296 """
1161 spiga 1.232 txt += ' fi\n'
1162 spiga 1.189 txt += 'else\n'
1163     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1164     txt += 'fi\n'
1165     txt += '\n'
1166 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1167 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1168     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1169     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1170     txt += ' job_exit_code=$executable_exit_status\n'
1171     txt += ' func_exit\n'
1172     txt += 'fi\n\n'
1173 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1174     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1175     txt += 'job_exit_code=$executable_exit_status\n'
1176    
1177     return txt
1178    
1179 gutsche 1.5 def setParam_(self, param, value):
1180     self._params[param] = value
1181    
1182     def getParams(self):
1183     return self._params
1184 gutsche 1.8
1185 spiga 1.257 def outList(self,list=False):
1186 mcinquil 1.121 """
1187     check the dimension of the output files
1188     """
1189 spiga 1.169 txt = ''
1190     txt += 'echo ">>> list of expected files on output sandbox"\n'
1191 mcinquil 1.121 listOutFiles = []
1192 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1193 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1194 spiga 1.268 if len(self.output_file) <= 0:
1195     msg ="WARNING: no output files name have been defined!!\n"
1196     msg+="\tno output files will be reported back/staged\n"
1197 spiga 1.304 common.logger.info(msg)
1198 ewv 1.349.2.1
1199 fanzago 1.148 if (self.return_data == 1):
1200 farinafa 1.348 for file in (self.output_file):
1201     listOutFiles.append(numberFile(file, '$OutUniqueID'))
1202     for file in (self.output_file_sandbox):
1203     listOutFiles.append(numberFile(file, '$NJob'))
1204     listOutFiles.append(stdout)
1205     listOutFiles.append(stderr)
1206    
1207 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1208 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1209 spiga 1.169 txt += 'export filesToCheck\n'
1210 spiga 1.341 taskinfo={}
1211     taskinfo['outfileBasename'] = self.output_file
1212     common._db.updateTask_(taskinfo)
1213 ewv 1.276
1214 spiga 1.257 if list : return self.output_file
1215 ewv 1.170 return txt