ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
Revision: 1.338
Committed: Thu Sep 17 16:45:57 2009 UTC (15 years, 7 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
Changes since 1.337: +7 -2 lines
Log Message:
added a check to validate the syntax of the input dataset name, savannah bug 55539

File Contents

# User Rev Content
1 ewv 1.327
2 fanzago 1.338 __revision__ = "$Id: cms_cmssw.py,v 1.337 2009/09/16 15:49:01 fanzago Exp $"
3     __version__ = "$Revision: 1.337 $"
4 ewv 1.327
5 slacapra 1.1 from JobType import JobType
6     from crab_exceptions import *
7     from crab_util import *
8     import common
9     import Scram
10 spiga 1.269 from Splitter import JobSplitter
11 slacapra 1.1
12 spiga 1.293 from IMProv.IMProvNode import IMProvNode
13 slacapra 1.105 import os, string, glob
14 slacapra 1.1
15     class Cmssw(JobType):
16 spiga 1.208 def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
17 slacapra 1.1 JobType.__init__(self, 'CMSSW')
18 spiga 1.304 common.logger.debug('CMSSW::__init__')
19 spiga 1.208 self.skip_blocks = skip_blocks
20 spiga 1.296 self.argsList = 1
21 spiga 1.315 self.NumEvents=0
22 gutsche 1.3 self._params = {}
23     self.cfg_params = cfg_params
24 ewv 1.254
25 spiga 1.234 ### Temporary patch to automatically skip the ISB size check:
26 ewv 1.319 self.server = self.cfg_params.get('CRAB.server_name',None) or \
27     self.cfg_params.get('CRAB.use_server',0)
28 ewv 1.333 self.local = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE']
29 ewv 1.250 size = 9.5
30 ewv 1.333 if self.server or self.local:
31 ewv 1.319 size = 99999
32 spiga 1.306 self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
33 gutsche 1.72
34 gutsche 1.44 # number of jobs requested to be created, limit obj splitting
35 gutsche 1.38 self.ncjobs = ncjobs
36    
37 slacapra 1.1 self.scram = Scram.Scram(cfg_params)
38     self.additional_inbox_files = []
39     self.scriptExe = ''
40     self.executable = ''
41 slacapra 1.71 self.executable_arch = self.scram.getArch()
42 spiga 1.320 self.tgz_name = 'default.tgz'
43 corvo 1.56 self.scriptName = 'CMSSW.sh'
44 ewv 1.192 self.pset = ''
45 spiga 1.187 self.datasetPath = ''
46 gutsche 1.3
47 spiga 1.300 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
48 gutsche 1.50 # set FJR file name
49     self.fjrFileName = 'crab_fjr.xml'
50    
51 slacapra 1.1 self.version = self.scram.getSWVersion()
52 spiga 1.304 common.logger.log(10-1,"CMSSW version is: "+str(self.version))
53 spiga 1.324 version_array = self.version.split('_')
54     self.CMSSW_major = 0
55     self.CMSSW_minor = 0
56     self.CMSSW_patch = 0
57 ewv 1.182 try:
58 spiga 1.324 self.CMSSW_major = int(version_array[1])
59     self.CMSSW_minor = int(version_array[2])
60     self.CMSSW_patch = int(version_array[3])
61 ewv 1.182 except:
62 ewv 1.184 msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
63 ewv 1.182 raise CrabException(msg)
64    
65 ewv 1.334 if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
66     msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
67 ewv 1.276 raise CrabException(msg)
68     """
69     As CMSSW versions are dropped we can drop more code:
70 ewv 1.334 2.x dropped: drop check for lumi range setting
71 ewv 1.276 """
72    
73 slacapra 1.1 ### collect Data cards
74 gutsche 1.66
75 fanzago 1.221 ### Temporary: added to remove input file control in the case of PU
76 farinafa 1.224 self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
77 ewv 1.226
78 slacapra 1.153 tmp = cfg_params['CMSSW.datasetpath']
79 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
80 spiga 1.236
81     if tmp =='':
82     msg = "Error: datasetpath not defined "
83     raise CrabException(msg)
84     elif string.lower(tmp)=='none':
85 slacapra 1.153 self.datasetPath = None
86     self.selectNoInput = 1
87 fanzago 1.318 self.primaryDataset = 'null'
88 slacapra 1.153 else:
89     self.datasetPath = tmp
90     self.selectNoInput = 0
91 fanzago 1.338 ll = len(self.datasetPath.split("/"))
92     if (ll < 4):
93     msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
94     msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
95     raise CrabException(msg)
96 fanzago 1.318 self.primaryDataset = self.datasetPath.split("/")[1]
97     self.dataTier = self.datasetPath.split("/")[2]
98 gutsche 1.5
99 ewv 1.326 # Analysis dataset is primary/processed/tier/definition
100 ewv 1.330 self.ads = False
101     if self.datasetPath:
102     self.ads = len(self.datasetPath.split("/")) > 4
103 ewv 1.295
104 ewv 1.327 # FUTURE: Can remove this check
105     if self.ads and self.CMSSW_major < 3:
106     common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
107     common.logger.info(' Only file level, not lumi level, granularity is supported.')
108    
109 spiga 1.288 self.debugWrap=''
110 fanzago 1.285 self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
111     if self.debug_wrapper == 1: self.debugWrap='--debug'
112 slacapra 1.291
113 slacapra 1.1 ## now the application
114 ewv 1.313 self.managedGenerators = ['madgraph', 'comphep', 'lhe']
115 ewv 1.258 self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
116 slacapra 1.153 self.executable = cfg_params.get('CMSSW.executable','cmsRun')
117 spiga 1.305 common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
118 slacapra 1.1
119 slacapra 1.153 if not cfg_params.has_key('CMSSW.pset'):
120 slacapra 1.1 raise CrabException("PSet file missing. Cannot run cmsRun ")
121 slacapra 1.153 self.pset = cfg_params['CMSSW.pset']
122 spiga 1.305 common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
123 slacapra 1.153 if self.pset.lower() != 'none' :
124     if (not os.path.exists(self.pset)):
125     raise CrabException("User defined PSet file "+self.pset+" does not exist")
126     else:
127     self.pset = None
128 slacapra 1.1
129     # output files
130 slacapra 1.53 ## stuff which must be returned always via sandbox
131     self.output_file_sandbox = []
132    
133     # add fjr report by default via sandbox
134     self.output_file_sandbox.append(self.fjrFileName)
135    
136     # other output files to be returned via sandbox or copied to SE
137 mcinquil 1.216 outfileflag = False
138 slacapra 1.153 self.output_file = []
139     tmp = cfg_params.get('CMSSW.output_file',None)
140     if tmp :
141 slacapra 1.207 self.output_file = [x.strip() for x in tmp.split(',')]
142 mcinquil 1.216 outfileflag = True #output found
143     #else:
144     # log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
145 slacapra 1.1
146     # script_exe file as additional file in inputSandbox
147 slacapra 1.153 self.scriptExe = cfg_params.get('USER.script_exe',None)
148     if self.scriptExe :
149 slacapra 1.176 if not os.path.isfile(self.scriptExe):
150     msg ="ERROR. file "+self.scriptExe+" not found"
151     raise CrabException(msg)
152     self.additional_inbox_files.append(string.strip(self.scriptExe))
153 slacapra 1.70
154 spiga 1.314 self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
155     if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
156    
157 spiga 1.42 if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
158 slacapra 1.176 msg ="Error. script_exe not defined"
159     raise CrabException(msg)
160 spiga 1.42
161 ewv 1.226 # use parent files...
162 spiga 1.269 self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
163 spiga 1.204
164 slacapra 1.1 ## additional input files
165 slacapra 1.153 if cfg_params.has_key('USER.additional_input_files'):
166 slacapra 1.29 tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
167 slacapra 1.70 for tmp in tmpAddFiles:
168     tmp = string.strip(tmp)
169     dirname = ''
170     if not tmp[0]=="/": dirname = "."
171 corvo 1.85 files = []
172     if string.find(tmp,"*")>-1:
173     files = glob.glob(os.path.join(dirname, tmp))
174     if len(files)==0:
175     raise CrabException("No additional input file found with this pattern: "+tmp)
176     else:
177     files.append(tmp)
178 slacapra 1.70 for file in files:
179     if not os.path.exists(file):
180     raise CrabException("Additional input file not found: "+file)
181 slacapra 1.45 pass
182 slacapra 1.105 self.additional_inbox_files.append(string.strip(file))
183 slacapra 1.1 pass
184     pass
185 spiga 1.304 common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
186 slacapra 1.153 pass
187 gutsche 1.3
188 gutsche 1.35
189 ewv 1.160 ## New method of dealing with seeds
190     self.incrementSeeds = []
191     self.preserveSeeds = []
192     if cfg_params.has_key('CMSSW.preserve_seeds'):
193     tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
194     for tmp in tmpList:
195     tmp.strip()
196     self.preserveSeeds.append(tmp)
197     if cfg_params.has_key('CMSSW.increment_seeds'):
198     tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
199     for tmp in tmpList:
200     tmp.strip()
201     self.incrementSeeds.append(tmp)
202    
203 slacapra 1.153 self.firstRun = cfg_params.get('CMSSW.first_run',None)
204 slacapra 1.90
205 fanzago 1.318 # Copy/return/publish
206 slacapra 1.153 self.copy_data = int(cfg_params.get('USER.copy_data',0))
207     self.return_data = int(cfg_params.get('USER.return_data',0))
208 fanzago 1.318 ### FEDE ###
209     self.publish_data = int(cfg_params.get('USER.publish_data',0))
210     if (self.publish_data == 1):
211     if not cfg_params.has_key('USER.publish_data_name'):
212     raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
213     else:
214     self.processedDataset = cfg_params['USER.publish_data_name']
215 ewv 1.329 """
216 fanzago 1.328 #### check of length of datasetname to publish ####
217 fanzago 1.318 common.logger.debug("test 100 char limit on datasetname")
218 fanzago 1.328 print "test 100 char limit on datasetname"
219     ###
220     len_file = 0
221     print "self.output_file = ", self.output_file
222     for file in self.output_file:
223     length = len(file)
224     if length > len_file:
225     len_file = length
226 ewv 1.329 print "len_file = ", len_file
227 fanzago 1.328 common.logger.debug("len_file = " + str(len_file))
228 ewv 1.329 ###
229 fanzago 1.318 user = getUserName()
230 fanzago 1.328 len_user_name = len(user)
231 fanzago 1.318 common.logger.debug("user = " + user)
232 fanzago 1.328 print "len_user_name = ", len_user_name
233 ewv 1.319 common.logger.debug("len_user_name = " + str(len_user_name))
234 fanzago 1.328
235 fanzago 1.318 len_processedDataset = len(self.processedDataset)
236     common.logger.debug("processedDataset " + self.processedDataset)
237     common.logger.debug("len_processedDataset = " + str(len_processedDataset))
238 fanzago 1.328 print "len_processedDataset = ", len_processedDataset
239 ewv 1.329
240 fanzago 1.318 if (self.datasetPath != None ):
241     len_primary = len(self.primaryDataset)
242     common.logger.debug("primaryDataset = " + self.primaryDataset)
243     common.logger.debug("len_primary = " + str(len_primary))
244 fanzago 1.328 if (len_primary > 100):
245     raise CrabException("Warning: primary datasetname has to be < 100 characters")
246     #500 - len_user_name - len_primary - 32 - 9 - 7 - output
247     #if (len_processedDataset > (59 - len_user_name - len_primary)):
248     if (len_processedDataset > ( 450 - len_user_name - len_primary - len_file)):
249     #raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(59 - len_user_name - len_primary) + " characters")
250     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str(450 - len_user_name - len_primary -len_file) + " characters")
251 fanzago 1.318 else:
252 fanzago 1.328 #if (len_processedDataset > (59 - len_user_name) / 2):
253     # raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((59 - len_user_name) / 2) + " characters")
254     if (len_processedDataset > (450 - len_user_name -len_file) / 2):
255     raise CrabException("Warning: publication name too long. USER.publish_data_name has to be < " + str((450 - len_user_name - len_file) / 2) + " characters")
256 ewv 1.329 """
257 ewv 1.276
258     self.conf = {}
259     self.conf['pubdata'] = None
260 spiga 1.269 # number of jobs requested to be created, limit obj splitting DD
261 slacapra 1.1 #DBSDLS-start
262 ewv 1.131 ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
263 slacapra 1.1 self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
264     self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
265 gutsche 1.35 self.jobDestination=[] # Site destination(s) for each job (list of lists)
266 slacapra 1.1 ## Perform the data location and discovery (based on DBS/DLS)
267 slacapra 1.9 ## SL: Don't if NONE is specified as input (pythia use case)
268 gutsche 1.35 blockSites = {}
269 slacapra 1.9 if self.datasetPath:
270 gutsche 1.35 blockSites = self.DataDiscoveryAndLocation(cfg_params)
271 ewv 1.131 #DBSDLS-end
272 spiga 1.269 self.conf['blockSites']=blockSites
273    
274 slacapra 1.9 ## Select Splitting
275 spiga 1.269 splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
276    
277 ewv 1.131 if self.selectNoInput:
278 spiga 1.187 if self.pset == None:
279 ewv 1.276 self.algo = 'ForScript'
280 spiga 1.42 else:
281 spiga 1.271 self.algo = 'NoInput'
282 ewv 1.276 self.conf['managedGenerators']=self.managedGenerators
283     self.conf['generator']=self.generator
284 ewv 1.326 elif self.ads:
285     self.algo = 'LumiBased'
286 ewv 1.276 elif splitByRun ==1:
287     self.algo = 'RunBased'
288 spiga 1.269 else:
289 ewv 1.276 self.algo = 'EventBased'
290 ewv 1.326 common.logger.debug("Job splitting method: %s" % self.algo)
291 ewv 1.276
292     splitter = JobSplitter(self.cfg_params,self.conf)
293 spiga 1.269 self.dict = splitter.Algos()[self.algo]()
294 gutsche 1.5
295 spiga 1.300 self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
296     self.rootArgsFilename= 'arguments'
297 spiga 1.208 # modify Pset only the first time
298 spiga 1.320 if isNew:
299     if self.pset != None: self.ModifyPset()
300 spiga 1.300
301 spiga 1.320 ## Prepare inputSandbox TarBall (only the first time)
302     self.tarNameWithPath = self.getTarBall(self.executable)
303 spiga 1.293
304    
305     def ModifyPset(self):
306     import PsetManipulator as pp
307 ewv 1.335
308     # If pycfg_params set, fake out the config script
309     # to make it think it was called with those args
310     pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
311     if pycfg_params:
312     trueArgv = sys.argv
313     sys.argv = [self.pset]
314     sys.argv.extend(pycfg_params.split(' '))
315 spiga 1.293 PsetEdit = pp.PsetManipulator(self.pset)
316 ewv 1.335 if pycfg_params: # Restore original sys.argv
317     sys.argv = trueArgv
318    
319 spiga 1.293 try:
320     # Add FrameworkJobReport to parameter-set, set max events.
321     # Reset later for data jobs by writeCFG which does all modifications
322 ewv 1.295 PsetEdit.maxEvent(1)
323 spiga 1.293 PsetEdit.skipEvent(0)
324     PsetEdit.psetWriter(self.configFilename())
325     ## If present, add TFileService to output files
326     if not int(self.cfg_params.get('CMSSW.skip_TFileService_output',0)):
327     tfsOutput = PsetEdit.getTFileService()
328     if tfsOutput:
329     if tfsOutput in self.output_file:
330 spiga 1.304 common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
331 spiga 1.293 else:
332     outfileflag = True #output found
333     self.output_file.append(tfsOutput)
334 spiga 1.304 common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
335 spiga 1.293 pass
336     pass
337 ewv 1.321 # If present and requested, add PoolOutputModule to output files
338 ewv 1.301 edmOutput = PsetEdit.getPoolOutputModule()
339 spiga 1.293 if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
340     if edmOutput:
341 ewv 1.321 for outputFile in edmOutput:
342     if outputFile in self.output_file:
343 ewv 1.325 common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
344 ewv 1.321 else:
345     self.output_file.append(outputFile)
346     common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
347     # not requested, check anyhow to avoid accidental T2 overload
348 slacapra 1.297 else:
349 ewv 1.321 if edmOutput:
350     missedFiles = []
351     for outputFile in edmOutput:
352     if outputFile not in self.output_file:
353     missedFiles.append(outputFile)
354     if missedFiles:
355     msg = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
356     msg += " but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
357     msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
358     if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
359     msg += " CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
360     common.logger.info(msg)
361 spiga 1.322 else :
362 ewv 1.321 raise CrabException(msg)
363 ewv 1.301
364     if (PsetEdit.getBadFilesSetting()):
365     msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
366 spiga 1.304 common.logger.info(msg)
367 ewv 1.301
368 slacapra 1.297 except CrabException, msg:
369 spiga 1.304 common.logger.info(str(msg))
370 slacapra 1.297 msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
371 spiga 1.293 raise CrabException(msg)
372    
373 gutsche 1.3
374 slacapra 1.1 def DataDiscoveryAndLocation(self, cfg_params):
375    
376 slacapra 1.86 import DataDiscovery
377     import DataLocation
378 spiga 1.304 common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
379 gutsche 1.3
380     datasetPath=self.datasetPath
381    
382 slacapra 1.1 ## Contact the DBS
383 spiga 1.304 common.logger.info("Contacting Data Discovery Services ...")
384 slacapra 1.1 try:
385 spiga 1.208 self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
386 slacapra 1.1 self.pubdata.fetchDBSInfo()
387    
388 slacapra 1.41 except DataDiscovery.NotExistingDatasetError, ex :
389 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
390     raise CrabException(msg)
391 slacapra 1.41 except DataDiscovery.NoDataTierinProvenanceError, ex :
392 slacapra 1.1 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
393     raise CrabException(msg)
394 slacapra 1.41 except DataDiscovery.DataDiscoveryError, ex:
395 gutsche 1.66 msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
396 slacapra 1.1 raise CrabException(msg)
397    
398 gutsche 1.35 self.filesbyblock=self.pubdata.getFiles()
399 slacapra 1.270 #print self.filesbyblock
400 spiga 1.269 self.conf['pubdata']=self.pubdata
401 gutsche 1.3
402 slacapra 1.1 ## get max number of events
403 ewv 1.192 self.maxEvents=self.pubdata.getMaxEvents()
404 slacapra 1.1
405     ## Contact the DLS and build a list of sites hosting the fileblocks
406     try:
407 slacapra 1.41 dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
408 gutsche 1.6 dataloc.fetchDLSInfo()
409 slacapra 1.263
410 slacapra 1.41 except DataLocation.DataLocationError , ex:
411 slacapra 1.1 msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
412     raise CrabException(msg)
413 ewv 1.131
414 slacapra 1.1
415 slacapra 1.270 unsorted_sites = dataloc.getSites()
416     sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
417     for lfn in self.filesbyblock.keys():
418     if unsorted_sites.has_key(lfn):
419     sites[lfn]=unsorted_sites[lfn]
420     else:
421     sites[lfn]=[]
422    
423 slacapra 1.264 if len(sites)==0:
424 spiga 1.267 msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
425     msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
426     msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
427 slacapra 1.264 raise CrabException(msg)
428    
429 gutsche 1.35 allSites = []
430     listSites = sites.values()
431 slacapra 1.63 for listSite in listSites:
432     for oneSite in listSite:
433 gutsche 1.35 allSites.append(oneSite)
434 slacapra 1.291 [allSites.append(it) for it in allSites if not allSites.count(it)]
435 ewv 1.295
436 gutsche 1.3
437 gutsche 1.92 # screen output
438 spiga 1.304 common.logger.info("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
439 gutsche 1.92
440 gutsche 1.35 return sites
441 ewv 1.131
442 spiga 1.42
443 spiga 1.208 def split(self, jobParams,firstJobID):
444 ewv 1.276
445 spiga 1.293 jobParams = self.dict['args']
446 spiga 1.269 njobs = self.dict['njobs']
447     self.jobDestination = self.dict['jobDestination']
448 ewv 1.131
449 ewv 1.333 if njobs == 0:
450     raise CrabException("Asked to split zero jobs: aborting")
451     if not self.server and not self.local and njobs > 500:
452     raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
453 slacapra 1.263
454 gutsche 1.3 # create the empty structure
455     for i in range(njobs):
456     jobParams.append("")
457 ewv 1.131
458 spiga 1.165 listID=[]
459     listField=[]
460 spiga 1.293 listDictions=[]
461 spiga 1.300 exist= os.path.exists(self.argsFile)
462 spiga 1.208 for id in range(njobs):
463     job = id + int(firstJobID)
464 spiga 1.167 listID.append(job+1)
465 spiga 1.162 job_ToSave ={}
466 spiga 1.169 concString = ' '
467 spiga 1.165 argu=''
468 spiga 1.293 str_argu = str(job+1)
469 spiga 1.208 if len(jobParams[id]):
470 ewv 1.295 argu = {'JobID': job+1}
471 spiga 1.293 for i in range(len(jobParams[id])):
472     argu[self.dict['params'][i]]=jobParams[id][i]
473 spiga 1.315 if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
474 ewv 1.295 # just for debug
475 spiga 1.293 str_argu += concString.join(jobParams[id])
476 spiga 1.314 if argu != '': listDictions.append(argu)
477 spiga 1.298 job_ToSave['arguments']= str(job+1)
478 spiga 1.208 job_ToSave['dlsDestination']= self.jobDestination[id]
479 spiga 1.165 listField.append(job_ToSave)
480 slacapra 1.311 from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
481     cms_se = CmsSEMap()
482 ewv 1.295 msg="Job %s Arguments: %s\n"%(str(job+1),str_argu)
483 spiga 1.293 msg+="\t Destination: %s "%(str(self.jobDestination[id]))
484 slacapra 1.311 SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
485     msg+="\t CMSDestination: %s "%(str(SEDestination))
486 spiga 1.307 common.logger.log(10-1,msg)
487 spiga 1.293 # write xml
488 ewv 1.295 if len(listDictions):
489 spiga 1.293 if exist==False: self.CreateXML()
490     self.addEntry(listDictions)
491 spiga 1.320 # self.zipXMLfile()
492 spiga 1.187 common._db.updateJob_(listID,listField)
493 spiga 1.293 return
494 ewv 1.313
495 spiga 1.320 # def zipXMLfile(self):
496 ewv 1.313
497 spiga 1.320 # import tarfile
498     # try:
499     # tar = tarfile.open(self.tarNameWithPath, "a")
500     # tar.add(self.argsFile, os.path.basename(self.argsFile))
501     # tar.close()
502     # except IOError, exc:
503     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
504     # msg += str(exc)
505     # raise CrabException(msg)
506     # except tarfile.TarError, exc:
507     # msg = 'Could not add %s to %s \n'%(self.argsFile,self.tarNameWithPath)
508     # msg += str(exc)
509     # raise CrabException(msg)
510 ewv 1.325
511 spiga 1.293 def CreateXML(self):
512     """
513 ewv 1.295 """
514 spiga 1.300 result = IMProvNode( self.rootArgsFilename )
515     outfile = file( self.argsFile, 'w').write(str(result))
516 ewv 1.295 return
517 spiga 1.293
518     def addEntry(self, listDictions):
519     """
520     _addEntry_
521 ewv 1.295
522 spiga 1.293 add an entry to the xml file
523     """
524     from IMProv.IMProvLoader import loadIMProvFile
525     ## load xml
526 spiga 1.300 improvDoc = loadIMProvFile(self.argsFile)
527 spiga 1.293 entrname= 'Job'
528     for dictions in listDictions:
529     report = IMProvNode(entrname , None, **dictions)
530     improvDoc.addNode(report)
531 spiga 1.300 outfile = file( self.argsFile, 'w').write(str(improvDoc))
532 gutsche 1.3 return
533 ewv 1.131
534 gutsche 1.3 def numberOfJobs(self):
535 spiga 1.269 return self.dict['njobs']
536 gutsche 1.3
537 slacapra 1.1 def getTarBall(self, exe):
538     """
539     Return the TarBall with lib and exe
540     """
541 spiga 1.320 self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
542     if os.path.exists(self.tgzNameWithPath):
543     return self.tgzNameWithPath
544 slacapra 1.1
545     # Prepare a tar gzipped file with user binaries.
546     self.buildTar_(exe)
547    
548 spiga 1.320 return string.strip(self.tgzNameWithPath)
549 slacapra 1.1
550     def buildTar_(self, executable):
551    
552     # First of all declare the user Scram area
553     swArea = self.scram.getSWArea_()
554     swReleaseTop = self.scram.getReleaseTop_()
555 ewv 1.131
556 slacapra 1.1 ## check if working area is release top
557     if swReleaseTop == '' or swArea == swReleaseTop:
558 spiga 1.304 common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
559 slacapra 1.1 return
560    
561 slacapra 1.61 import tarfile
562     try: # create tar ball
563 spiga 1.320 tar = tarfile.open(self.tgzNameWithPath, "w:gz")
564 slacapra 1.61 ## First find the executable
565 slacapra 1.86 if (self.executable != ''):
566 slacapra 1.61 exeWithPath = self.scram.findFile_(executable)
567     if ( not exeWithPath ):
568     raise CrabException('User executable '+executable+' not found')
569 ewv 1.131
570 slacapra 1.61 ## then check if it's private or not
571     if exeWithPath.find(swReleaseTop) == -1:
572     # the exe is private, so we must ship
573 spiga 1.304 common.logger.debug("Exe "+exeWithPath+" to be tarred")
574 slacapra 1.61 path = swArea+'/'
575 corvo 1.85 # distinguish case when script is in user project area or given by full path somewhere else
576     if exeWithPath.find(path) >= 0 :
577     exe = string.replace(exeWithPath, path,'')
578 slacapra 1.129 tar.add(path+exe,exe)
579 corvo 1.85 else :
580     tar.add(exeWithPath,os.path.basename(executable))
581 slacapra 1.61 pass
582     else:
583     # the exe is from release, we'll find it on WN
584     pass
585 ewv 1.131
586 slacapra 1.61 ## Now get the libraries: only those in local working area
587 slacapra 1.256 tar.dereference=True
588 slacapra 1.61 libDir = 'lib'
589     lib = swArea+'/' +libDir
590 spiga 1.304 common.logger.debug("lib "+lib+" to be tarred")
591 slacapra 1.61 if os.path.exists(lib):
592     tar.add(lib,libDir)
593 ewv 1.131
594 slacapra 1.61 ## Now check if module dir is present
595     moduleDir = 'module'
596     module = swArea + '/' + moduleDir
597     if os.path.isdir(module):
598     tar.add(module,moduleDir)
599 slacapra 1.256 tar.dereference=False
600 slacapra 1.61
601     ## Now check if any data dir(s) is present
602 spiga 1.179 self.dataExist = False
603 slacapra 1.212 todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
604 slacapra 1.206 while len(todo_list):
605     entry, name = todo_list.pop()
606 slacapra 1.211 if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
607 slacapra 1.206 continue
608 slacapra 1.212 if os.path.isdir(swArea+"/src/"+entry):
609 slacapra 1.206 entryPath = entry + '/'
610 slacapra 1.212 todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
611 slacapra 1.206 if name == 'data':
612     self.dataExist=True
613 spiga 1.304 common.logger.debug("data "+entry+" to be tarred")
614 slacapra 1.212 tar.add(swArea+"/src/"+entry,"src/"+entry)
615 slacapra 1.206 pass
616     pass
617 ewv 1.182
618 spiga 1.179 ### CMSSW ParameterSet
619     if not self.pset is None:
620     cfg_file = common.work_space.jobDir()+self.configFilename()
621 ewv 1.182 tar.add(cfg_file,self.configFilename())
622 ewv 1.313
623 spiga 1.309 try:
624     crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
625     tar.add(crab_cfg_file,'crab.cfg')
626     except:
627     pass
628 fanzago 1.93
629 fanzago 1.152 ## Add ProdCommon dir to tar
630 slacapra 1.211 prodcommonDir = './'
631     prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
632 spiga 1.244 neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
633 spiga 1.298 'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
634     'WMCore/__init__.py','WMCore/Algorithms']
635 slacapra 1.214 for file in neededStuff:
636     tar.add(prodcommonPath+file,prodcommonDir+file)
637 spiga 1.179
638     ##### ML stuff
639     ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
640     path=os.environ['CRABDIR'] + '/python/'
641     for file in ML_file_list:
642     tar.add(path+file,file)
643    
644     ##### Utils
645 spiga 1.238 Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
646 spiga 1.179 for file in Utils_file_list:
647     tar.add(path+file,file)
648 ewv 1.131
649 ewv 1.182 ##### AdditionalFiles
650 slacapra 1.253 tar.dereference=True
651 spiga 1.179 for file in self.additional_inbox_files:
652     tar.add(file,string.split(file,'/')[-1])
653 slacapra 1.253 tar.dereference=False
654 spiga 1.320 common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
655 ewv 1.182
656 slacapra 1.61 tar.close()
657 mcinquil 1.241 except IOError, exc:
658 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
659 spiga 1.304 msg += str(exc)
660     raise CrabException(msg)
661 mcinquil 1.241 except tarfile.TarError, exc:
662 spiga 1.320 msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
663 spiga 1.304 msg += str(exc)
664     raise CrabException(msg)
665 spiga 1.300
666 gutsche 1.72 tarballinfo = os.stat(self.tgzNameWithPath)
667     if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
668 spiga 1.238 msg = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) \
669 ewv 1.250 +'MB input sandbox limit \n'
670 spiga 1.238 msg += ' and not supported by the direct GRID submission system.\n'
671     msg += ' Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
672 mcinquil 1.336 msg += ' For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/CrabServerForUsers#Server_available_for_users'
673 spiga 1.238 raise CrabException(msg)
674 gutsche 1.72
675 slacapra 1.61 ## create tar-ball with ML stuff
676 slacapra 1.97
677 spiga 1.165 def wsSetupEnvironment(self, nj=0):
678 slacapra 1.1 """
679     Returns part of a job script which prepares
680     the execution environment for the job 'nj'.
681     """
682 ewv 1.334 psetName = 'pset.py'
683    
684 slacapra 1.1 # Prepare JobType-independent part
685 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
686 fanzago 1.133 txt += 'echo ">>> setup environment"\n'
687 spiga 1.290 txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
688 gutsche 1.3 txt += self.wsSetupCMSLCGEnvironment_()
689 ewv 1.283 txt += 'elif [ $middleware == OSG ]; then\n'
690 gutsche 1.43 txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
691 ewv 1.132 txt += ' if [ ! $? == 0 ] ;then\n'
692 fanzago 1.161 txt += ' echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
693     txt += ' job_exit_code=10016\n'
694     txt += ' func_exit\n'
695 gutsche 1.3 txt += ' fi\n'
696 fanzago 1.133 txt += ' echo ">>> Created working directory: $WORKING_DIR"\n'
697 gutsche 1.3 txt += '\n'
698     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
699     txt += ' cd $WORKING_DIR\n'
700 fanzago 1.133 txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
701 ewv 1.131 txt += self.wsSetupCMSOSGEnvironment_()
702 spiga 1.282 #Setup SGE Environment
703 ewv 1.283 txt += 'elif [ $middleware == SGE ]; then\n'
704 spiga 1.282 txt += self.wsSetupCMSLCGEnvironment_()
705    
706 edelmann 1.289 txt += 'elif [ $middleware == ARC ]; then\n'
707     txt += self.wsSetupCMSLCGEnvironment_()
708    
709 gutsche 1.3 txt += 'fi\n'
710 slacapra 1.1
711     # Prepare JobType-specific part
712     scram = self.scram.commandName()
713     txt += '\n\n'
714 fanzago 1.133 txt += 'echo ">>> specific cmssw setup environment:"\n'
715     txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
716 slacapra 1.1 txt += scram+' project CMSSW '+self.version+'\n'
717     txt += 'status=$?\n'
718     txt += 'if [ $status != 0 ] ; then\n'
719 fanzago 1.161 txt += ' echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
720     txt += ' job_exit_code=10034\n'
721 fanzago 1.163 txt += ' func_exit\n'
722 slacapra 1.1 txt += 'fi \n'
723     txt += 'cd '+self.version+'\n'
724 spiga 1.277 txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
725 fanzago 1.133 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
726 slacapra 1.1 txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
727 fanzago 1.180 txt += 'if [ $? != 0 ] ; then\n'
728     txt += ' echo "ERROR ==> Problem with the command: "\n'
729     txt += ' echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
730     txt += ' job_exit_code=10034\n'
731     txt += ' func_exit\n'
732     txt += 'fi \n'
733 slacapra 1.1 # Handle the arguments:
734     txt += "\n"
735 gutsche 1.7 txt += "## number of arguments (first argument always jobnumber)\n"
736 slacapra 1.1 txt += "\n"
737 spiga 1.165 txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
738 slacapra 1.1 txt += "then\n"
739 fanzago 1.161 txt += " echo 'ERROR ==> Too few arguments' +$nargs+ \n"
740     txt += ' job_exit_code=50113\n'
741     txt += " func_exit\n"
742 slacapra 1.1 txt += "fi\n"
743     txt += "\n"
744    
745     # Prepare job-specific part
746     job = common.job_list[nj]
747 ewv 1.131 if (self.datasetPath):
748 fanzago 1.318 #self.primaryDataset = self.datasetPath.split("/")[1]
749     #DataTier = self.datasetPath.split("/")[2]
750 fanzago 1.93 txt += '\n'
751     txt += 'DatasetPath='+self.datasetPath+'\n'
752    
753 spiga 1.238 txt += 'PrimaryDataset='+self.primaryDataset +'\n'
754 fanzago 1.318 txt += 'DataTier='+self.dataTier+'\n'
755 fanzago 1.96 txt += 'ApplicationFamily=cmsRun\n'
756 fanzago 1.93
757     else:
758 fanzago 1.318 #self.primaryDataset = 'null'
759 fanzago 1.93 txt += 'DatasetPath=MCDataTier\n'
760     txt += 'PrimaryDataset=null\n'
761     txt += 'DataTier=null\n'
762     txt += 'ApplicationFamily=MCDataTier\n'
763 ewv 1.170 if self.pset != None:
764 spiga 1.42 pset = os.path.basename(job.configFilename())
765     txt += '\n'
766 spiga 1.95 txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
767 spiga 1.296
768 ewv 1.295 txt += 'PreserveSeeds=' + ','.join(self.preserveSeeds) + '; export PreserveSeeds\n'
769     txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
770     txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
771     txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
772 slacapra 1.90
773 ewv 1.184 txt += 'mv -f ' + pset + ' ' + psetName + '\n'
774 ewv 1.319 else:
775 spiga 1.314 txt += '\n'
776 spiga 1.331 if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
777 spiga 1.315 if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
778 gutsche 1.3 return txt
779 slacapra 1.176
780 fanzago 1.166 def wsUntarSoftware(self, nj=0):
781 gutsche 1.3 """
782     Put in the script the commands to build an executable
783     or a library.
784     """
785    
786 fanzago 1.166 txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
787 gutsche 1.3
788     if os.path.isfile(self.tgzNameWithPath):
789 fanzago 1.133 txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
790 spiga 1.300 txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
791 fanzago 1.285 if self.debug_wrapper==1 :
792 spiga 1.199 txt += 'ls -Al \n'
793 gutsche 1.3 txt += 'untar_status=$? \n'
794     txt += 'if [ $untar_status -ne 0 ]; then \n'
795 fanzago 1.161 txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
796     txt += ' job_exit_code=$untar_status\n'
797     txt += ' func_exit\n'
798 gutsche 1.3 txt += 'else \n'
799     txt += ' echo "Successful untar" \n'
800     txt += 'fi \n'
801 gutsche 1.50 txt += '\n'
802 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
803 gutsche 1.50 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
804 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
805 gutsche 1.50 txt += 'else\n'
806 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
807 fanzago 1.93 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
808 gutsche 1.50 txt += 'fi\n'
809     txt += '\n'
810    
811 gutsche 1.3 pass
812 ewv 1.131
813 slacapra 1.1 return txt
814 ewv 1.170
815 fanzago 1.166 def wsBuildExe(self, nj=0):
816     """
817     Put in the script the commands to build an executable
818     or a library.
819     """
820    
821     txt = '\n#Written by cms_cmssw::wsBuildExe\n'
822     txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
823    
824 ewv 1.170 txt += 'rm -r lib/ module/ \n'
825     txt += 'mv $RUNTIME_AREA/lib/ . \n'
826     txt += 'mv $RUNTIME_AREA/module/ . \n'
827 spiga 1.186 if self.dataExist == True:
828     txt += 'rm -r src/ \n'
829     txt += 'mv $RUNTIME_AREA/src/ . \n'
830 ewv 1.182 if len(self.additional_inbox_files)>0:
831 spiga 1.179 for file in self.additional_inbox_files:
832 spiga 1.191 txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
833 slacapra 1.214 # txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
834     # txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
835 ewv 1.170
836 slacapra 1.211 txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
837 fanzago 1.166 txt += 'if [ -z "$PYTHONPATH" ]; then\n'
838 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
839 fanzago 1.166 txt += 'else\n'
840 slacapra 1.211 txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
841 fanzago 1.166 txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
842     txt += 'fi\n'
843     txt += '\n'
844    
845 slacapra 1.302 if self.pset != None:
846 ewv 1.334 psetName = 'pset.py'
847    
848 slacapra 1.302 txt += '\n'
849     if self.debug_wrapper == 1:
850     txt += 'echo "***** cat ' + psetName + ' *********"\n'
851     txt += 'cat ' + psetName + '\n'
852     txt += 'echo "****** end ' + psetName + ' ********"\n'
853     txt += '\n'
854     txt += 'echo "***********************" \n'
855     txt += 'which edmConfigHash \n'
856     txt += 'echo "***********************" \n'
857 ewv 1.334 txt += 'edmConfigHash ' + psetName + ' \n'
858     txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
859 slacapra 1.302 txt += 'echo "PSETHASH = $PSETHASH" \n'
860     #### FEDE temporary fix for noEdm files #####
861     txt += 'if [ -z "$PSETHASH" ]; then \n'
862     txt += ' export PSETHASH=null\n'
863     txt += 'fi \n'
864     #############################################
865     txt += '\n'
866 fanzago 1.166 return txt
867 slacapra 1.1
868 ewv 1.131
869 slacapra 1.1 def executableName(self):
870 ewv 1.192 if self.scriptExe:
871 spiga 1.42 return "sh "
872     else:
873     return self.executable
874 slacapra 1.1
875     def executableArgs(self):
876 ewv 1.276 if self.scriptExe:
877 ewv 1.319 return self.scriptExe + " $NJob $AdditionalArgs"
878 fanzago 1.115 else:
879 ewv 1.334 return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
880 slacapra 1.1
881     def inputSandbox(self, nj):
882     """
883     Returns a list of filenames to be put in JDL input sandbox.
884     """
885     inp_box = []
886     if os.path.isfile(self.tgzNameWithPath):
887     inp_box.append(self.tgzNameWithPath)
888 spiga 1.320 if os.path.isfile(self.argsFile):
889     inp_box.append(self.argsFile)
890 spiga 1.243 inp_box.append(common.work_space.jobDir() + self.scriptName)
891 slacapra 1.1 return inp_box
892    
893     def outputSandbox(self, nj):
894     """
895     Returns a list of filenames to be put in JDL output sandbox.
896     """
897     out_box = []
898    
899     ## User Declared output files
900 slacapra 1.54 for out in (self.output_file+self.output_file_sandbox):
901 ewv 1.131 n_out = nj + 1
902 slacapra 1.207 out_box.append(numberFile(out,str(n_out)))
903 slacapra 1.1 return out_box
904    
905    
906     def wsRenameOutput(self, nj):
907     """
908     Returns part of a job script which renames the produced files.
909     """
910    
911 ewv 1.160 txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
912 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
913     txt += 'echo ">>> current directory content:"\n'
914 fanzago 1.285 if self.debug_wrapper==1:
915 spiga 1.199 txt += 'ls -Al\n'
916 fanzago 1.145 txt += '\n'
917 slacapra 1.54
918 fanzago 1.128 for fileWithSuffix in (self.output_file):
919 slacapra 1.207 output_file_num = numberFile(fileWithSuffix, '$NJob')
920 slacapra 1.1 txt += '\n'
921 gutsche 1.7 txt += '# check output file\n'
922 slacapra 1.106 txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
923 ewv 1.147 if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
924     txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n'
925 spiga 1.209 txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
926 ewv 1.147 else:
927     txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
928     txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
929 slacapra 1.106 txt += 'else\n'
930 fanzago 1.161 txt += ' job_exit_code=60302\n'
931     txt += ' echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
932 ewv 1.156 if common.scheduler.name().upper() == 'CONDOR_G':
933 gutsche 1.7 txt += ' if [ $middleware == OSG ]; then \n'
934     txt += ' echo "prepare dummy output file"\n'
935     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
936     txt += ' fi \n'
937 slacapra 1.1 txt += 'fi\n'
938 slacapra 1.105 file_list = []
939     for fileWithSuffix in (self.output_file):
940 spiga 1.246 file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$NJob'))
941 ewv 1.131
942 spiga 1.245 txt += 'file_list="'+string.join(file_list,',')+'"\n'
943 fanzago 1.149 txt += '\n'
944 fanzago 1.148 txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
945     txt += 'echo ">>> current directory content:"\n'
946 fanzago 1.285 if self.debug_wrapper==1:
947 spiga 1.199 txt += 'ls -Al\n'
948 fanzago 1.148 txt += '\n'
949 gutsche 1.7 txt += 'cd $RUNTIME_AREA\n'
950 fanzago 1.133 txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
951 slacapra 1.1 return txt
952    
953 slacapra 1.63 def getRequirements(self, nj=[]):
954 slacapra 1.1 """
955 ewv 1.131 return job requirements to add to jdl files
956 slacapra 1.1 """
957     req = ''
958 slacapra 1.47 if self.version:
959 slacapra 1.10 req='Member("VO-cms-' + \
960 slacapra 1.47 self.version + \
961 slacapra 1.10 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
962 ewv 1.192 if self.executable_arch:
963 gutsche 1.107 req+=' && Member("VO-cms-' + \
964 slacapra 1.105 self.executable_arch + \
965     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
966 gutsche 1.35
967     req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
968 afanfani 1.229 if ( common.scheduler.name() == "glitecoll" ) or ( common.scheduler.name() == "glite"):
969 slacapra 1.316 ## 25-Jun-2009 SL: patch to use Cream enabled WMS
970     if ( self.cfg_params.get('GRID.use_cream',None) ):
971     req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
972     else:
973     req += ' && other.GlueCEStateStatus == "Production" '
974 gutsche 1.35
975 slacapra 1.1 return req
976 gutsche 1.3
977     def configFilename(self):
978     """ return the config filename """
979 ewv 1.334 return self.name()+'.py'
980 gutsche 1.3
981     def wsSetupCMSOSGEnvironment_(self):
982     """
983     Returns part of a job script which is prepares
984     the execution environment and which is common for all CMS jobs.
985     """
986 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
987     txt += ' echo ">>> setup CMS OSG environment:"\n'
988 fanzago 1.133 txt += ' echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
989     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
990 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
991 ewv 1.135 txt += ' if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
992 mkirn 1.40 txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
993 fanzago 1.133 txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
994     txt += ' else\n'
995 fanzago 1.161 txt += ' echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
996     txt += ' job_exit_code=10020\n'
997     txt += ' func_exit\n'
998 fanzago 1.133 txt += ' fi\n'
999 gutsche 1.3 txt += '\n'
1000 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1001 fanzago 1.136 txt += ' echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1002 gutsche 1.3
1003     return txt
1004 ewv 1.131
1005 gutsche 1.3 def wsSetupCMSLCGEnvironment_(self):
1006     """
1007     Returns part of a job script which is prepares
1008     the execution environment and which is common for all CMS jobs.
1009     """
1010 ewv 1.160 txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1011     txt += ' echo ">>> setup CMS LCG environment:"\n'
1012 fanzago 1.133 txt += ' echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1013     txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1014     txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
1015     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
1016 fanzago 1.161 txt += ' echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1017     txt += ' job_exit_code=10031\n'
1018     txt += ' func_exit\n'
1019 fanzago 1.133 txt += ' else\n'
1020     txt += ' echo "Sourcing environment... "\n'
1021     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1022 fanzago 1.161 txt += ' echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1023     txt += ' job_exit_code=10020\n'
1024     txt += ' func_exit\n'
1025 fanzago 1.133 txt += ' fi\n'
1026     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1027     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1028     txt += ' result=$?\n'
1029     txt += ' if [ $result -ne 0 ]; then\n'
1030 fanzago 1.161 txt += ' echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1031     txt += ' job_exit_code=10032\n'
1032     txt += ' func_exit\n'
1033 fanzago 1.133 txt += ' fi\n'
1034     txt += ' fi\n'
1035     txt += ' \n'
1036 fanzago 1.161 txt += ' echo "==> setup cms environment ok"\n'
1037 gutsche 1.3 return txt
1038 gutsche 1.5
1039 spiga 1.238 def wsModifyReport(self, nj):
1040 fanzago 1.93 """
1041 ewv 1.131 insert the part of the script that modifies the FrameworkJob Report
1042 fanzago 1.93 """
1043 ewv 1.250
1044 fanzago 1.281 txt = ''
1045 fanzago 1.292 if (self.copy_data == 1):
1046 fanzago 1.281 txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1047 ewv 1.283
1048 spiga 1.238
1049     txt += 'if [ $StageOutExitStatus -eq 0 ]; then\n'
1050 fanzago 1.248 txt += ' FOR_LFN=$LFNBaseName\n'
1051 fanzago 1.175 txt += 'else\n'
1052     txt += ' FOR_LFN=/copy_problems/ \n'
1053     txt += 'fi\n'
1054 ewv 1.182
1055 fanzago 1.175 txt += 'echo ">>> Modify Job Report:" \n'
1056 fanzago 1.217 txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1057 fanzago 1.175 txt += 'echo "SE = $SE"\n'
1058 fanzago 1.337 #### FEDE changing SE_PATH with the endpoint
1059     txt += 'echo "endpoint = $endpoint"\n'
1060     txt += 'SE_PATH=$endpoint\n'
1061     txt += 'echo "SE_PATH = $endpoint"\n'
1062 fanzago 1.175 txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1063     txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1064 fanzago 1.337
1065 fanzago 1.281
1066 fanzago 1.323 args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml n_job $NJob for_lfn $FOR_LFN PrimaryDataset $PrimaryDataset ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH se_name $SE se_path $SE_PATH file_list $file_list'
1067 fanzago 1.318 if (self.publish_data == 1):
1068     txt += 'ProcessedDataset='+self.processedDataset+'\n'
1069 fanzago 1.292 txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1070     args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1071 fanzago 1.281
1072 fanzago 1.247 txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1073     txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1074 fanzago 1.175 txt += 'modifyReport_result=$?\n'
1075     txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1076     txt += ' modifyReport_result=70500\n'
1077     txt += ' job_exit_code=$modifyReport_result\n'
1078     txt += ' echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1079     txt += ' echo "WARNING: Problem with ModifyJobReport"\n'
1080     txt += 'else\n'
1081     txt += ' mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1082 spiga 1.103 txt += 'fi\n'
1083 fanzago 1.93 return txt
1084 ewv 1.283
1085 ewv 1.192 def wsParseFJR(self):
1086 spiga 1.189 """
1087 ewv 1.192 Parse the FrameworkJobReport to obtain useful infos
1088 spiga 1.189 """
1089     txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1090     txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1091     txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1092     txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1093 spiga 1.197 txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1094 fanzago 1.285 if self.debug_wrapper==1 :
1095 spiga 1.197 txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1096     txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1097 spiga 1.189 txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1098     txt += ' echo ">>> crab_fjr.xml contents: "\n'
1099 spiga 1.222 txt += ' cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1100 spiga 1.189 txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1101 spiga 1.197 txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1102     txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1103 spiga 1.189 txt += ' else\n'
1104     txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1105     txt += ' fi\n'
1106     txt += ' else\n'
1107     txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1108     txt += ' fi\n'
1109     #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1110 spiga 1.232 txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1111 fanzago 1.273 txt += ' echo ">>> Executable succeded $executable_exit_status"\n'
1112 ewv 1.301 ## This cannot more work given the changes on the Job argumentsJob
1113 spiga 1.296 """
1114 spiga 1.269 if (self.datasetPath and not (self.dataset_pu or self.useParent==1)) :
1115 spiga 1.189 # VERIFY PROCESSED DATA
1116 fanzago 1.273 txt += ' echo ">>> Verify list of processed files:"\n'
1117     txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1118     txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1119     txt += ' cat input-files.txt | sort | uniq > tmp.txt\n'
1120     txt += ' mv tmp.txt input-files.txt\n'
1121     txt += ' echo "cat input-files.txt"\n'
1122     txt += ' echo "----------------------"\n'
1123     txt += ' cat input-files.txt\n'
1124     txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n'
1125     txt += ' mv tmp.txt processed-files.txt\n'
1126     txt += ' echo "----------------------"\n'
1127     txt += ' echo "cat processed-files.txt"\n'
1128     txt += ' echo "----------------------"\n'
1129     txt += ' cat processed-files.txt\n'
1130     txt += ' echo "----------------------"\n'
1131 spiga 1.278 txt += ' diff -qbB input-files.txt processed-files.txt\n'
1132 fanzago 1.273 txt += ' fileverify_status=$?\n'
1133     txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1134     txt += ' executable_exit_status=30001\n'
1135     txt += ' echo "ERROR ==> not all input files processed"\n'
1136     txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1137     txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1138     txt += ' fi\n'
1139 spiga 1.296 """
1140 spiga 1.232 txt += ' fi\n'
1141 spiga 1.189 txt += 'else\n'
1142     txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1143     txt += 'fi\n'
1144     txt += '\n'
1145 fanzago 1.279 txt += 'if [ $executable_exit_status -ne 0 ] && [ $executable_exit_status -ne 50115 ] && [ $executable_exit_status -ne 50117 ] && [ $executable_exit_status -ne 30001 ];then\n'
1146 fanzago 1.273 txt += ' echo ">>> Executable failed $executable_exit_status"\n'
1147     txt += ' echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1148     txt += ' echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1149     txt += ' job_exit_code=$executable_exit_status\n'
1150     txt += ' func_exit\n'
1151     txt += 'fi\n\n'
1152 spiga 1.189 txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1153     txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1154     txt += 'job_exit_code=$executable_exit_status\n'
1155    
1156     return txt
1157    
1158 gutsche 1.5 def setParam_(self, param, value):
1159     self._params[param] = value
1160    
1161     def getParams(self):
1162     return self._params
1163 gutsche 1.8
1164 spiga 1.257 def outList(self,list=False):
1165 mcinquil 1.121 """
1166     check the dimension of the output files
1167     """
1168 spiga 1.169 txt = ''
1169     txt += 'echo ">>> list of expected files on output sandbox"\n'
1170 mcinquil 1.121 listOutFiles = []
1171 ewv 1.170 stdout = 'CMSSW_$NJob.stdout'
1172 spiga 1.169 stderr = 'CMSSW_$NJob.stderr'
1173 spiga 1.268 if len(self.output_file) <= 0:
1174     msg ="WARNING: no output files name have been defined!!\n"
1175     msg+="\tno output files will be reported back/staged\n"
1176 spiga 1.304 common.logger.info(msg)
1177 fanzago 1.148 if (self.return_data == 1):
1178 spiga 1.157 for file in (self.output_file+self.output_file_sandbox):
1179 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1180 spiga 1.169 listOutFiles.append(stdout)
1181     listOutFiles.append(stderr)
1182 ewv 1.156 else:
1183 spiga 1.157 for file in (self.output_file_sandbox):
1184 slacapra 1.207 listOutFiles.append(numberFile(file, '$NJob'))
1185 spiga 1.169 listOutFiles.append(stdout)
1186     listOutFiles.append(stderr)
1187 fanzago 1.161 txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1188 spiga 1.157 txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1189 spiga 1.169 txt += 'export filesToCheck\n'
1190 ewv 1.276
1191 spiga 1.257 if list : return self.output_file
1192 ewv 1.170 return txt