[ViewVC] Diff of: cvsroot/COMP/CRAB/python/cms

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.183 by spiga, Wed Apr 30 18:21:07 2008 UTC vs.
Revision 1.221 by fanzago, Wed Jun 18 14:02:42 2008 UTC

#	Line 10 \| Line 10 \| from LFNBaseName import *
10		import os, string, glob
11
12		class Cmssw(JobType):
13	<	def __init__(self, cfg_params, ncjobs):
13	>	def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
14		JobType.__init__(self, 'CMSSW')
15		common.logger.debug(3,'CMSSW::__init__')
16	<
16	>	self.skip_blocks = skip_blocks
17	>
18		self.argsList = []
19
20		self._params = {}
#	Line 35 \| Line 36 \| class Cmssw(JobType):
36		self.executable_arch = self.scram.getArch()
37		self.tgz_name = 'default.tgz'
38		self.scriptName = 'CMSSW.sh'
39	<	self.pset = '' #scrip use case Da
40	<	self.datasetPath = '' #scrip use case Da
39	>	self.pset = ''
40	>	self.datasetPath = ''
41
42		# set FJR file name
43		self.fjrFileName = 'crab_fjr.xml'
44
45		self.version = self.scram.getSWVersion()
46		version_array = self.version.split('_')
47	<	self.major_version = 0
48	<	self.minor_version = 0
47	>	self.CMSSW_major = 0
48	>	self.CMSSW_minor = 0
49	>	self.CMSSW_patch = 0
50		try:
51	<	self.major_version = int(version_array[1])
52	<	self.minor_version = int(version_array[2])
51	>	self.CMSSW_major = int(version_array[1])
52	>	self.CMSSW_minor = int(version_array[2])
53	>	self.CMSSW_patch = int(version_array[3])
54		except:
55	<	msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
55	>	msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
56		raise CrabException(msg)
57
55	–
56	–	#
57	–	# Try to block creation in case of arch/version mismatch
58	–	#
59	–
60	–	# a = string.split(self.version, "_")
61	–	#
62	–	# if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
63	–	# msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
64	–	# common.logger.message(msg)
65	–	# if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
66	–	# msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
67	–	# raise CrabException(msg)
68	–	#
69	–
70	–
58		### collect Data cards
59
60		if not cfg_params.has_key('CMSSW.datasetpath'):
61		msg = "Error: datasetpath not defined "
62		raise CrabException(msg)
63	+
64	+	### Temporary: added to remove input file control in the case of PU
65	+	if not cfg_params.has_key('USER.dataset_pu'):
66	+	self.dataset_pu = 'NONE'
67	+	else:
68	+	self.dataset_pu = cfg_params['USER.dataset_pu']
69	+	####
70	+
71		tmp = cfg_params['CMSSW.datasetpath']
72		log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
73		if string.lower(tmp)=='none':
#	Line 83 \| Line 78 \| class Cmssw(JobType):
78		self.selectNoInput = 0
79
80		self.dataTiers = []
81	<
81	>	self.debugWrap = ''
82	>	self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False)
83	>	if self.debug_wrapper: self.debugWrap='--debug'
84		## now the application
85		self.executable = cfg_params.get('CMSSW.executable','cmsRun')
86		log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
#	Line 106 \| Line 103 \| class Cmssw(JobType):
103		self.output_file_sandbox.append(self.fjrFileName)
104
105		# other output files to be returned via sandbox or copied to SE
106	+	outfileflag = False
107		self.output_file = []
108		tmp = cfg_params.get('CMSSW.output_file',None)
109		if tmp :
110	<	tmpOutFiles = string.split(tmp,',')
111	<	log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
112	<	for tmp in tmpOutFiles:
113	<	tmp=string.strip(tmp)
116	<	self.output_file.append(tmp)
117	<	pass
118	<	else:
119	<	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
120	<	pass
110	>	self.output_file = [x.strip() for x in tmp.split(',')]
111	>	outfileflag = True #output found
112	>	#else:
113	>	# log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
114
115		# script_exe file as additional file in inputSandbox
116		self.scriptExe = cfg_params.get('USER.script_exe',None)
#	Line 127 \| Line 120 \| class Cmssw(JobType):
120		raise CrabException(msg)
121		self.additional_inbox_files.append(string.strip(self.scriptExe))
122
130	–	#CarlosDaniele
123		if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
124		msg ="Error. script_exe not defined"
125		raise CrabException(msg)
126
127	+	# use parent files...
128	+	self.useParent = self.cfg_params.get('CMSSW.use_parent',False)
129	+
130		## additional input files
131		if cfg_params.has_key('USER.additional_input_files'):
132		tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
#	Line 150 \| Line 145 \| class Cmssw(JobType):
145		if not os.path.exists(file):
146		raise CrabException("Additional input file not found: "+file)
147		pass
153	–	# fname = string.split(file, '/')[-1]
154	–	# storedFile = common.work_space.pathForTgz()+'share/'+fname
155	–	# shutil.copyfile(file, storedFile)
148		self.additional_inbox_files.append(string.strip(file))
149		pass
150		pass
#	Line 178 \| Line 170 \| class Cmssw(JobType):
170		if cfg_params.has_key('CMSSW.total_number_of_events'):
171		self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
172		self.selectTotalNumberEvents = 1
173	+	if self.selectNumberOfJobs == 1:
174	+	if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs):
175	+	msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
176	+	raise CrabException(msg)
177		else:
178		self.total_number_of_events = 0
179		self.selectTotalNumberEvents = 0
180
181	<	if self.pset != None: #CarlosDaniele
181	>	if self.pset != None:
182		if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
183		msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
184		raise CrabException(msg)
#	Line 212 \| Line 208 \| class Cmssw(JobType):
208		if self.sourceSeed:
209		print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
210		self.incrementSeeds.append('sourceSeed')
211	+	self.incrementSeeds.append('theSource')
212
213		self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
214		if self.sourceSeedVtx:
#	Line 230 \| Line 227 \| class Cmssw(JobType):
227
228		self.firstRun = cfg_params.get('CMSSW.first_run',None)
229
233	–	if self.pset != None: #CarlosDaniele
234	–	import PsetManipulator as pp
235	–	PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
230
231		# Copy/return
238	–
232		self.copy_data = int(cfg_params.get('USER.copy_data',0))
233		self.return_data = int(cfg_params.get('USER.return_data',0))
234
#	Line 251 \| Line 244 \| class Cmssw(JobType):
244		blockSites = self.DataDiscoveryAndLocation(cfg_params)
245		#DBSDLS-end
246
254	–
247		## Select Splitting
248		if self.selectNoInput:
249	<	if self.pset == None: #CarlosDaniele
249	>	if self.pset == None:
250		self.jobSplittingForScript()
251		else:
252		self.jobSplittingNoInput()
253		else:
254		self.jobSplittingByBlocks(blockSites)
255
256	<	# modify Pset
257	<	if self.pset != None: #CarlosDaniele
258	<	try:
259	<	# Add FrameworkJobReport to parameter-set, set max events.
260	<	# Reset later for data jobs by writeCFG which does all modifications
261	<	PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
262	<	PsetEdit.maxEvent(self.eventsPerJob)
263	<	PsetEdit.psetWriter(self.configFilename())
264	<	except:
265	<	msg='Error while manipuliating ParameterSet: exiting...'
266	<	raise CrabException(msg)
267	<	self.tgzNameWithPath = self.getTarBall(self.executable)
256	>	# modify Pset only the first time
257	>	if isNew:
258	>	if self.pset != None:
259	>	import PsetManipulator as pp
260	>	PsetEdit = pp.PsetManipulator(self.pset)
261	>	try:
262	>	# Add FrameworkJobReport to parameter-set, set max events.
263	>	# Reset later for data jobs by writeCFG which does all modifications
264	>	PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
265	>	PsetEdit.maxEvent(self.eventsPerJob)
266	>	PsetEdit.psetWriter(self.configFilename())
267	>	## If present, add TFileService to output files
268	>	if not int(cfg_params.get('CMSSW.skip_TFileService_output',0)):
269	>	tfsOutput = PsetEdit.getTFileService()
270	>	if tfsOutput:
271	>	if tfsOutput in self.output_file:
272	>	common.logger.debug(5,"Output from TFileService "+tfsOutput+" already in output files")
273	>	else:
274	>	outfileflag = True #output found
275	>	self.output_file.append(tfsOutput)
276	>	common.logger.message("Adding "+tfsOutput+" to output files (from TFileService)")
277	>	pass
278	>	pass
279	>	## If present and requested, add PoolOutputModule to output files
280	>	if int(cfg_params.get('CMSSW.get_edm_output',0)):
281	>	edmOutput = PsetEdit.getPoolOutputModule()
282	>	if edmOutput:
283	>	if edmOutput in self.output_file:
284	>	common.logger.debug(5,"Output from PoolOutputModule "+edmOutput+" already in output files")
285	>	else:
286	>	self.output_file.append(edmOutput)
287	>	common.logger.message("Adding "+edmOutput+" to output files (from PoolOutputModule)")
288	>	pass
289	>	pass
290	>	except CrabException:
291	>	msg='Error while manipulating ParameterSet: exiting...'
292	>	raise CrabException(msg)
293	>	## Prepare inputSandbox TarBall (only the first time)
294	>	self.tgzNameWithPath = self.getTarBall(self.executable)
295
296		def DataDiscoveryAndLocation(self, cfg_params):
297
#	Line 285 \| Line 304 \| class Cmssw(JobType):
304		## Contact the DBS
305		common.logger.message("Contacting Data Discovery Services ...")
306		try:
307	<	self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
307	>	self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
308		self.pubdata.fetchDBSInfo()
309
310		except DataDiscovery.NotExistingDatasetError, ex :
#	Line 301 \| Line 320 \| class Cmssw(JobType):
320		self.filesbyblock=self.pubdata.getFiles()
321		self.eventsbyblock=self.pubdata.getEventsPerBlock()
322		self.eventsbyfile=self.pubdata.getEventsPerFile()
323	+	self.parentFiles=self.pubdata.getParent()
324
325		## get max number of events
326	<	self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
326	>	self.maxEvents=self.pubdata.getMaxEvents()
327
328		## Contact the DLS and build a list of sites hosting the fileblocks
329		try:
#	Line 327 \| Line 347 \| class Cmssw(JobType):
347
348		return sites
349
330	–	# to Be Removed DS -- BL
331	–	# def setArgsList(self, argsList):
332	–	# self.argsList = argsList
333	–
350		def jobSplittingByBlocks(self, blockSites):
351		"""
352		Perform job splitting. Jobs run over an integer number of files
#	Line 421 \| Line 437 \| class Cmssw(JobType):
437
438		# ---- Iterate over the files in the block until we've met the requested ---- #
439		# ---- total # of events or we've gone over all the files in this block ---- #
440	+	pString=''
441		while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
442		file = files[fileCount]
443	+	if self.useParent:
444	+	parent = self.parentFiles[file]
445	+	for f in parent :
446	+	pString += '\\\"' + f + '\\\"\,'
447	+	common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent))
448	+	common.logger.write("File "+str(file)+" has the following parents: "+str(parent))
449		if newFile :
450		try:
451		numEventsInFile = self.eventsbyfile[file]
#	Line 443 \| Line 466 \| class Cmssw(JobType):
466		# end job using last file, use remaining events in block
467		# close job and touch new file
468		fullString = parString[:-2]
469	<	list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
469	>	if self.useParent:
470	>	fullParentString = pString[:-2]
471	>	list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)])
472	>	else:
473	>	list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
474		common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
475		self.jobDestination.append(blockSites[block])
476		common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
#	Line 455 \| Line 482 \| class Cmssw(JobType):
482		eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
483		jobSkipEventCount = 0
484		# reset file
485	+	pString = ""
486		parString = ""
487		filesEventCount = 0
488		newFile = 1
#	Line 467 \| Line 495 \| class Cmssw(JobType):
495		elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
496		# close job and touch new file
497		fullString = parString[:-2]
498	<	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
498	>	if self.useParent:
499	>	fullParentString = pString[:-2]
500	>	list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
501	>	else:
502	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
503		common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
504		self.jobDestination.append(blockSites[block])
505		common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
#	Line 478 \| Line 510 \| class Cmssw(JobType):
510		eventsRemaining = eventsRemaining - eventsPerJobRequested
511		jobSkipEventCount = 0
512		# reset file
513	+	pString = ""
514		parString = ""
515		filesEventCount = 0
516		newFile = 1
#	Line 487 \| Line 520 \| class Cmssw(JobType):
520		else :
521		# close job but don't touch new file
522		fullString = parString[:-2]
523	<	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
523	>	if self.useParent:
524	>	fullParentString = pString[:-2]
525	>	list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)])
526	>	else:
527	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
528		common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
529		self.jobDestination.append(blockSites[block])
530		common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
#	Line 501 \| Line 538 \| class Cmssw(JobType):
538		jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
539		# remove all but the last file
540		filesEventCount = self.eventsbyfile[file]
541	+	if self.useParent:
542	+	for f in parent : pString += '\\\"' + f + '\\\"\,'
543		parString = '\\\"' + file + '\\\"\,'
544		pass # END if
545		pass # END while (iterate over files in the block)
#	Line 609 \| Line 648 \| class Cmssw(JobType):
648		return
649
650
651	<	def jobSplittingForScript(self):#CarlosDaniele
651	>	def jobSplittingForScript(self):
652		"""
653		Perform job splitting based on number of job
654		"""
#	Line 625 \| Line 664 \| class Cmssw(JobType):
664		# argument is seed number.$i
665		self.list_of_args = []
666		for i in range(self.total_number_of_jobs):
628	–	## Since there is no input, any site is good
629	–	# self.jobDestination.append(["Any"])
667		self.jobDestination.append([""])
631	–	## no random seed
668		self.list_of_args.append([str(i)])
669		return
670
671	<	def split(self, jobParams):
671	>	def split(self, jobParams,firstJobID):
672
637	–	#### Fabio
673		njobs = self.total_number_of_jobs
674		arglist = self.list_of_args
675		# create the empty structure
#	Line 643 \| Line 678 \| class Cmssw(JobType):
678
679		listID=[]
680		listField=[]
681	<	for job in range(njobs):
682	<	jobParams[job] = arglist[job]
681	>	for id in range(njobs):
682	>	job = id + int(firstJobID)
683	>	jobParams[id] = arglist[id]
684		listID.append(job+1)
685		job_ToSave ={}
686		concString = ' '
687		argu=''
688	<	if len(jobParams[job]):
689	<	argu += concString.join(jobParams[job] )
690	<	job_ToSave['arguments']= str(job+1)+' '+argu## new BL--DS
691	<	job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
656	<	#common._db.updateJob_(job,job_ToSave)## new BL--DS
688	>	if len(jobParams[id]):
689	>	argu += concString.join(jobParams[id] )
690	>	job_ToSave['arguments']= str(job+1)+' '+argu
691	>	job_ToSave['dlsDestination']= self.jobDestination[id]
692		listField.append(job_ToSave)
693		msg="Job "+str(job)+" Arguments: "+str(job+1)+" "+argu+"\n" \
694	<	+" Destination: "+str(self.jobDestination[job])
694	>	+" Destination: "+str(self.jobDestination[id])
695		common.logger.debug(5,msg)
696	<	#common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
662	<	common._db.updateJob_(listID,listField)## new BL--DS
663	<	## Pay Attention Here....DS--BL
696	>	common._db.updateJob_(listID,listField)
697		self.argsList = (len(jobParams[0])+1)
698
699		return
700
701		def numberOfJobs(self):
669	–	# Fabio
702		return self.total_number_of_jobs
703
704		def getTarBall(self, exe):
705		"""
706		Return the TarBall with lib and exe
707		"""
676	–
677	–	# if it exist, just return it
678	–	#
679	–	# Marco. Let's start to use relative path for Boss XML files
680	–	#
708		self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
709		if os.path.exists(self.tgzNameWithPath):
710		return self.tgzNameWithPath
#	Line 691 \| Line 718 \| class Cmssw(JobType):
718
719		# First of all declare the user Scram area
720		swArea = self.scram.getSWArea_()
694	–	#print "swArea = ", swArea
695	–	# swVersion = self.scram.getSWVersion()
696	–	# print "swVersion = ", swVersion
721		swReleaseTop = self.scram.getReleaseTop_()
698	–	#print "swReleaseTop = ", swReleaseTop
722
723		## check if working area is release top
724		if swReleaseTop == '' or swArea == swReleaseTop:
#	Line 741 \| Line 764 \| class Cmssw(JobType):
764		tar.add(module,moduleDir)
765
766		## Now check if any data dir(s) is present
744	–	swAreaLen=len(swArea)
767		self.dataExist = False
768	<	for root, dirs, files in os.walk(swArea):
769	<	if "data" in dirs:
770	<	self.dataExist=True
771	<	common.logger.debug(5,"data "+root+"/data"+" to be tarred")
772	<	tar.add(root+"/data",root[swAreaLen:]+"/data")
768	>	todo_list = [(i, i) for i in os.listdir(swArea+"/src")]
769	>	while len(todo_list):
770	>	entry, name = todo_list.pop()
771	>	if name.startswith('crab_0_') or name.startswith('.') or name == 'CVS':
772	>	continue
773	>	if os.path.isdir(swArea+"/src/"+entry):
774	>	entryPath = entry + '/'
775	>	todo_list += [(entryPath + i, i) for i in os.listdir(swArea+"/src/"+entry)]
776	>	if name == 'data':
777	>	self.dataExist=True
778	>	common.logger.debug(5,"data "+entry+" to be tarred")
779	>	tar.add(swArea+"/src/"+entry,"src/"+entry)
780	>	pass
781	>	pass
782
783		### CMSSW ParameterSet
784		if not self.pset is None:
#	Line 757 \| Line 788 \| class Cmssw(JobType):
788
789
790		## Add ProdCommon dir to tar
791	<	prodcommonDir = 'ProdCommon'
792	<	prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
793	<	if os.path.isdir(prodcommonPath):
794	<	tar.add(prodcommonPath,prodcommonDir)
791	>	prodcommonDir = './'
792	>	prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
793	>	neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools','ProdCommon/Core','ProdCommon/MCPayloads', 'IMProv']
794	>	for file in neededStuff:
795	>	tar.add(prodcommonPath+file,prodcommonDir+file)
796		common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
797
798		##### ML stuff
#	Line 771 \| Line 803 \| class Cmssw(JobType):
803		common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
804
805		##### Utils
806	<	Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']
806	>	Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py']
807		for file in Utils_file_list:
808		tar.add(path+file,file)
809		common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
#	Line 782 \| Line 814 \| class Cmssw(JobType):
814		common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
815
816		tar.close()
817	<	except :
818	<	raise CrabException('Could not create tar-ball')
817	>	except IOError:
818	>	raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
819	>	except tarfile.TarError:
820	>	raise CrabException('Could not create tar-ball '+self.tgzNameWithPath)
821
822		## check for tarball size
823		tarballinfo = os.stat(self.tgzNameWithPath)
#	Line 797 \| Line 831 \| class Cmssw(JobType):
831		Returns part of a job script which prepares
832		the execution environment for the job 'nj'.
833		"""
834	+	if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
835	+	psetName = 'pset.py'
836	+	else:
837	+	psetName = 'pset.cfg'
838		# Prepare JobType-independent part
839		txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
840		txt += 'echo ">>> setup environment"\n'
#	Line 815 \| Line 853 \| class Cmssw(JobType):
853		txt += ' cd $WORKING_DIR\n'
854		txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
855		txt += self.wsSetupCMSOSGEnvironment_()
818	–	#txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
819	–	#txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
856		txt += 'fi\n'
857
858		# Prepare JobType-specific part
#	Line 855 \| Line 891 \| class Cmssw(JobType):
891
892		# Prepare job-specific part
893		job = common.job_list[nj]
858	–	### FEDE FOR DBS OUTPUT PUBLICATION
894		if (self.datasetPath):
895		txt += '\n'
896		txt += 'DatasetPath='+self.datasetPath+'\n'
#	Line 877 \| Line 912 \| class Cmssw(JobType):
912		txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
913		if (self.datasetPath): # standard job
914		txt += 'InputFiles=${args[1]}; export InputFiles\n'
915	<	txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
916	<	txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
915	>	if (self.useParent):
916	>	txt += 'ParentFiles=${args[2]}; export ParentFiles\n'
917	>	txt += 'MaxEvents=${args[3]}; export MaxEvents\n'
918	>	txt += 'SkipEvents=${args[4]}; export SkipEvents\n'
919	>	else:
920	>	txt += 'MaxEvents=${args[2]}; export MaxEvents\n'
921	>	txt += 'SkipEvents=${args[3]}; export SkipEvents\n'
922		txt += 'echo "Inputfiles:<$InputFiles>"\n'
923	+	if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n'
924		txt += 'echo "MaxEvents:<$MaxEvents>"\n'
925		txt += 'echo "SkipEvents:<$SkipEvents>"\n'
926		else: # pythia like job
#	Line 891 \| Line 932 \| class Cmssw(JobType):
932		txt += 'FirstRun=${args[1]}; export FirstRun\n'
933		txt += 'echo "FirstRun: <$FirstRun>"\n'
934
935	<	txt += 'mv -f '+pset+' pset.cfg\n'
935	>	txt += 'mv -f ' + pset + ' ' + psetName + '\n'
936
937
938		if self.pset != None:
939	+	# FUTURE: Can simply for 2_1_x and higher
940		txt += '\n'
941	<	txt += 'echo "*** cat pset.cfg *******"\n'
942	<	txt += 'cat pset.cfg\n'
943	<	txt += 'echo "**** end pset.cfg ******"\n'
944	<	txt += '\n'
945	<	txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
941	>	if self.debug_wrapper==True:
942	>	txt += 'echo "*** cat ' + psetName + ' *******"\n'
943	>	txt += 'cat ' + psetName + '\n'
944	>	txt += 'echo "**** end ' + psetName + ' ******"\n'
945	>	txt += '\n'
946	>	txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
947		txt += 'echo "PSETHASH = $PSETHASH" \n'
948		txt += '\n'
949		return txt
#	Line 916 \| Line 959 \| class Cmssw(JobType):
959		if os.path.isfile(self.tgzNameWithPath):
960		txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
961		txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
962	<	txt += 'ls -Al \n'
962	>	if self.debug_wrapper:
963	>	txt += 'ls -Al \n'
964		txt += 'untar_status=$? \n'
965		txt += 'if [ $untar_status -ne 0 ]; then \n'
966		txt += ' echo "ERROR ==> Untarring .tgz file failed"\n'
#	Line 926 \| Line 970 \| class Cmssw(JobType):
970		txt += ' echo "Successful untar" \n'
971		txt += 'fi \n'
972		txt += '\n'
973	<	txt += 'echo ">>> Include ProdCommon in PYTHONPATH:"\n'
973	>	txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
974		txt += 'if [ -z "$PYTHONPATH" ]; then\n'
975	<	txt += ' export PYTHONPATH=$RUNTIME_AREA/ProdCommon\n'
975	>	txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
976		txt += 'else\n'
977	<	txt += ' export PYTHONPATH=$RUNTIME_AREA/ProdCommon:${PYTHONPATH}\n'
977	>	txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
978		txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
979		txt += 'fi\n'
980		txt += '\n'
#	Line 951 \| Line 995 \| class Cmssw(JobType):
995		txt += 'rm -r lib/ module/ \n'
996		txt += 'mv $RUNTIME_AREA/lib/ . \n'
997		txt += 'mv $RUNTIME_AREA/module/ . \n'
998	<	if self.dataExist == True: txt += 'mv $RUNTIME_AREA/src/ . \n'
998	>	if self.dataExist == True:
999	>	txt += 'rm -r src/ \n'
1000	>	txt += 'mv $RUNTIME_AREA/src/ . \n'
1001		if len(self.additional_inbox_files)>0:
1002		for file in self.additional_inbox_files:
1003	<	txt += 'mv $RUNTIME_AREA/'+file+' . \n'
1004	<	txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1003	>	txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
1004	>	# txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
1005	>	# txt += 'mv $RUNTIME_AREA/IMProv/ . \n'
1006
1007	+	txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
1008		txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1009	<	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon\n'
1009	>	txt += ' export PYTHONPATH=$RUNTIME_AREA/\n'
1010		txt += 'else\n'
1011	<	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1011	>	txt += ' export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
1012		txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1013		txt += 'fi\n'
1014		txt += '\n'
1015
1016		return txt
1017
970	–	def modifySteeringCards(self, nj):
971	–	"""
972	–	modify the card provided by the user,
973	–	writing a new card into share dir
974	–	"""
1018
1019		def executableName(self):
1020	<	if self.scriptExe: #CarlosDaniele
1020	>	if self.scriptExe:
1021		return "sh "
1022		else:
1023		return self.executable
#	Line 987 \| Line 1030 \| class Cmssw(JobType):
1030		ex_args = ""
1031		# FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
1032		# Framework job report
1033	<	if (self.major_version >= 1 and self.minor_version >= 5) or (self.major_version >= 2):
1033	>	if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
1034		ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
1035	<	# Type of cfg file
1036	<	if self.major_version >= 2 :
1035	>	# Type of config file
1036	>	if self.CMSSW_major >= 2 :
1037		ex_args += " -p pset.py"
1038		else:
1039		ex_args += " -p pset.cfg"
#	Line 1001 \| Line 1044 \| class Cmssw(JobType):
1044		Returns a list of filenames to be put in JDL input sandbox.
1045		"""
1046		inp_box = []
1004	–	# # dict added to delete duplicate from input sandbox file list
1005	–	# seen = {}
1006	–	## code
1047		if os.path.isfile(self.tgzNameWithPath):
1048		inp_box.append(self.tgzNameWithPath)
1049		wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
#	Line 1019 \| Line 1059 \| class Cmssw(JobType):
1059		## User Declared output files
1060		for out in (self.output_file+self.output_file_sandbox):
1061		n_out = nj + 1
1062	<	out_box.append(self.numberFile_(out,str(n_out)))
1062	>	out_box.append(numberFile(out,str(n_out)))
1063		return out_box
1064
1025	–	def prepareSteeringCards(self):
1026	–	"""
1027	–	Make initial modifications of the user's steering card file.
1028	–	"""
1029	–	return
1065
1066		def wsRenameOutput(self, nj):
1067		"""
#	Line 1036 \| Line 1071 \| class Cmssw(JobType):
1071		txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
1072		txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1073		txt += 'echo ">>> current directory content:"\n'
1074	<	txt += 'ls \n'
1074	>	if self.debug_wrapper:
1075	>	txt += 'ls -Al\n'
1076		txt += '\n'
1077
1078		for fileWithSuffix in (self.output_file):
1079	<	output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1079	>	output_file_num = numberFile(fileWithSuffix, '$NJob')
1080		txt += '\n'
1081		txt += '# check output file\n'
1082		txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
#	Line 1061 \| Line 1097 \| class Cmssw(JobType):
1097		txt += 'fi\n'
1098		file_list = []
1099		for fileWithSuffix in (self.output_file):
1100	<	file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1100	>	file_list.append(numberFile(fileWithSuffix, '$NJob'))
1101
1102		txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1103		txt += '\n'
1104		txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
1105		txt += 'echo ">>> current directory content:"\n'
1106	<	txt += 'ls \n'
1106	>	if self.debug_wrapper:
1107	>	txt += 'ls -Al\n'
1108		txt += '\n'
1109		txt += 'cd $RUNTIME_AREA\n'
1110		txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n'
1111		return txt
1112
1076	–	def numberFile_(self, file, txt):
1077	–	"""
1078	–	append _'txt' before last extension of a file
1079	–	"""
1080	–	p = string.split(file,".")
1081	–	# take away last extension
1082	–	name = p[0]
1083	–	for x in p[1:-1]:
1084	–	name=name+"."+x
1085	–	# add "_txt"
1086	–	if len(p)>1:
1087	–	ext = p[len(p)-1]
1088	–	result = name + '_' + txt + "." + ext
1089	–	else:
1090	–	result = name + '_' + txt
1091	–
1092	–	return result
1093	–
1113		def getRequirements(self, nj=[]):
1114		"""
1115		return job requirements to add to jdl files
#	Line 1100 \| Line 1119 \| class Cmssw(JobType):
1119		req='Member("VO-cms-' + \
1120		self.version + \
1121		'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1122	<	## SL add requirement for OS version only if SL4
1104	<	#reSL4 = re.compile( r'slc4' )
1105	<	if self.executable_arch: # and reSL4.search(self.executable_arch):
1122	>	if self.executable_arch:
1123		req+=' && Member("VO-cms-' + \
1124		self.executable_arch + \
1125		'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
#	Line 1116 \| Line 1133 \| class Cmssw(JobType):
1133		def configFilename(self):
1134		""" return the config filename """
1135		# FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1136	<	if (self.major_version >= 2 and self.minor_version >= 1) or (self.major_version >= 3):
1136	>	if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1137		return self.name()+'.py'
1138		else:
1139		return self.name()+'.cfg'
#	Line 1145 \| Line 1162 \| class Cmssw(JobType):
1162
1163		return txt
1164
1148	–	### OLI_DANIELE
1165		def wsSetupCMSLCGEnvironment_(self):
1166		"""
1167		Returns part of a job script which is prepares
#	Line 1180 \| Line 1196 \| class Cmssw(JobType):
1196		txt += ' echo "==> setup cms environment ok"\n'
1197		return txt
1198
1183	–	### FEDE FOR DBS OUTPUT PUBLICATION
1199		def modifyReport(self, nj):
1200		"""
1201		insert the part of the script that modifies the FrameworkJob Report
1202		"""
1188	–
1203		txt = '\n#Written by cms_cmssw::modifyReport\n'
1204		publish_data = int(self.cfg_params.get('USER.publish_data',0))
1205		if (publish_data == 1):
#	Line 1201 \| Line 1215 \| class Cmssw(JobType):
1215		txt += 'fi\n'
1216
1217		txt += 'echo ">>> Modify Job Report:" \n'
1218	<	txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1218	>	txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1219		txt += 'ProcessedDataset='+processedDataset+'\n'
1220		txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1221		txt += 'echo "SE = $SE"\n'
1222		txt += 'echo "SE_PATH = $SE_PATH"\n'
1223		txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1224		txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1225	<	txt += 'echo "$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1226	<	txt += '$SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1225	>	txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1226	>	txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py $RUNTIME_AREA/crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1227		txt += 'modifyReport_result=$?\n'
1228		txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1229		txt += ' modifyReport_result=70500\n'
#	Line 1221 \| Line 1235 \| class Cmssw(JobType):
1235		txt += 'fi\n'
1236		return txt
1237
1238	+	def wsParseFJR(self):
1239	+	"""
1240	+	Parse the FrameworkJobReport to obtain useful infos
1241	+	"""
1242	+	txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1243	+	txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1244	+	txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1245	+	txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1246	+	txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1247	+	if self.debug_wrapper :
1248	+	txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1249	+	txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1250	+	txt += ' if [ $executable_exit_status -eq 50115 ];then\n'
1251	+	txt += ' echo ">>> crab_fjr.xml contents: "\n'
1252	+	txt += ' cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1253	+	txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1254	+	txt += ' elif [ $executable_exit_status -eq -999 ];then\n'
1255	+	txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1256	+	txt += ' else\n'
1257	+	txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1258	+	txt += ' fi\n'
1259	+	txt += ' else\n'
1260	+	txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1261	+	txt += ' fi\n'
1262	+	#### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1263	+
1264	+	if (self.datasetPath and self.dataset_pu == 'NONE'):
1265	+	# VERIFY PROCESSED DATA
1266	+	txt += ' if [ $executable_exit_status -eq 0 ];then\n'
1267	+	txt += ' echo ">>> Verify list of processed files:"\n'
1268	+	txt += ' echo $InputFiles \|tr -d \'\\\\\' \|tr \',\' \'\\n\'\|tr -d \'"\' > input-files.txt\n'
1269	+	txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n'
1270	+	txt += ' cat input-files.txt \| sort \| uniq > tmp.txt\n'
1271	+	txt += ' mv tmp.txt input-files.txt\n'
1272	+	txt += ' echo "cat input-files.txt"\n'
1273	+	txt += ' echo "----------------------"\n'
1274	+	txt += ' cat input-files.txt\n'
1275	+	txt += ' cat processed-files.txt \| sort \| uniq > tmp.txt\n'
1276	+	txt += ' mv tmp.txt processed-files.txt\n'
1277	+	txt += ' echo "----------------------"\n'
1278	+	txt += ' echo "cat processed-files.txt"\n'
1279	+	txt += ' echo "----------------------"\n'
1280	+	txt += ' cat processed-files.txt\n'
1281	+	txt += ' echo "----------------------"\n'
1282	+	txt += ' diff -q input-files.txt processed-files.txt\n'
1283	+	txt += ' fileverify_status=$?\n'
1284	+	txt += ' if [ $fileverify_status -ne 0 ]; then\n'
1285	+	txt += ' executable_exit_status=30001\n'
1286	+	txt += ' echo "ERROR ==> not all input files processed"\n'
1287	+	txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1288	+	txt += ' echo " ==> diff input-files.txt processed-files.txt"\n'
1289	+	txt += ' fi\n'
1290	+	txt += ' fi\n'
1291	+	txt += '\n'
1292	+	txt += 'else\n'
1293	+	txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1294	+	txt += 'fi\n'
1295	+	txt += '\n'
1296	+	txt += 'echo "ExeExitCode=$executable_exit_status" \| tee -a $RUNTIME_AREA/$repo\n'
1297	+	txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1298	+	txt += 'job_exit_code=$executable_exit_status\n'
1299	+
1300	+	return txt
1301	+
1302		def setParam_(self, param, value):
1303		self._params[param] = value
1304
#	Line 1247 \| Line 1325 \| class Cmssw(JobType):
1325		stderr = 'CMSSW_$NJob.stderr'
1326		if (self.return_data == 1):
1327		for file in (self.output_file+self.output_file_sandbox):
1328	<	listOutFiles.append(self.numberFile_(file, '$NJob'))
1328	>	listOutFiles.append(numberFile(file, '$NJob'))
1329		listOutFiles.append(stdout)
1330		listOutFiles.append(stderr)
1331		else:
1332		for file in (self.output_file_sandbox):
1333	<	listOutFiles.append(self.numberFile_(file, '$NJob'))
1333	>	listOutFiles.append(numberFile(file, '$NJob'))
1334		listOutFiles.append(stdout)
1335		listOutFiles.append(stderr)
1336		txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/cms_cmssw.py (file contents): Revision 1.183 by spiga, Wed Apr 30 18:21:07 2008 UTC vs. Revision 1.221 by fanzago, Wed Jun 18 14:02:42 2008 UTC

Diff Legend

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.183 by spiga, Wed Apr 30 18:21:07 2008 UTC vs.
Revision 1.221 by fanzago, Wed Jun 18 14:02:42 2008 UTC