[ViewVC] Diff of: cvsroot/COMP/CRAB/python/cms

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.13 by gutsche, Tue Jun 27 02:31:31 2006 UTC vs.
Revision 1.128 by fanzago, Thu Oct 11 16:23:44 2007 UTC

#	Line 2 \| Line 2 \| from JobType import JobType
2		from crab_logger import Logger
3		from crab_exceptions import *
4		from crab_util import *
5	+	from BlackWhiteListParser import BlackWhiteListParser
6		import common
6	–	import PsetManipulator
7	–
8	–	import DBSInfo_EDM
9	–	import DataDiscovery_EDM
10	–	import DataLocation_EDM
7		import Scram
8
9	<	import os, string, re
9	>	import os, string, glob
10
11		class Cmssw(JobType):
12	<	def __init__(self, cfg_params):
12	>	def __init__(self, cfg_params, ncjobs):
13		JobType.__init__(self, 'CMSSW')
14		common.logger.debug(3,'CMSSW::__init__')
15
20	–	self.analisys_common_info = {}
21	–	# Marco.
16		self._params = {}
17		self.cfg_params = cfg_params
18	+
19	+	# init BlackWhiteListParser
20	+	self.blackWhiteListParser = BlackWhiteListParser(cfg_params)
21	+
22	+	try:
23	+	self.MaxTarBallSize = float(self.cfg_params['EDG.maxtarballsize'])
24	+	except KeyError:
25	+	self.MaxTarBallSize = 9.5
26	+
27	+	# number of jobs requested to be created, limit obj splitting
28	+	self.ncjobs = ncjobs
29	+
30		log = common.logger
31
32		self.scram = Scram.Scram(cfg_params)
27	–	scramArea = ''
33		self.additional_inbox_files = []
34		self.scriptExe = ''
35		self.executable = ''
36	+	self.executable_arch = self.scram.getArch()
37		self.tgz_name = 'default.tgz'
38	+	self.additional_tgz_name = 'additional.tgz'
39	+	self.scriptName = 'CMSSW.sh'
40	+	self.pset = '' #scrip use case Da
41	+	self.datasetPath = '' #scrip use case Da
42
43	+	# set FJR file name
44	+	self.fjrFileName = 'crab_fjr.xml'
45
46		self.version = self.scram.getSWVersion()
47	+
48	+	#
49	+	# Try to block creation in case of arch/version mismatch
50	+	#
51	+
52	+	a = string.split(self.version, "_")
53	+
54	+	if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
55	+	msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
56	+	raise CrabException(msg)
57	+	if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
58	+	msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
59	+	raise CrabException(msg)
60	+
61	+	common.taskDB.setDict('codeVersion',self.version)
62		self.setParam_('application', self.version)
36	–	common.analisys_common_info['sw_version'] = self.version
37	–	### FEDE
38	–	common.analisys_common_info['copy_input_data'] = 0
39	–	common.analisys_common_info['events_management'] = 1
63
64		### collect Data cards
65	+
66	+	## get DBS mode
67	+	try:
68	+	self.use_dbs_1 = int(self.cfg_params['CMSSW.use_dbs_1'])
69	+	except KeyError:
70	+	self.use_dbs_1 = 0
71	+
72		try:
73		tmp = cfg_params['CMSSW.datasetpath']
74		log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
75		if string.lower(tmp)=='none':
76		self.datasetPath = None
77	+	self.selectNoInput = 1
78		else:
79		self.datasetPath = tmp
80	+	self.selectNoInput = 0
81		except KeyError:
82		msg = "Error: datasetpath not defined "
83		raise CrabException(msg)
#	Line 56 \| Line 88 \| class Cmssw(JobType):
88		self.setParam_('dataset', 'None')
89		self.setParam_('owner', 'None')
90		else:
91	<	datasetpath_split = self.datasetPath.split("/")
92	<	self.setParam_('dataset', datasetpath_split[1])
93	<	self.setParam_('owner', datasetpath_split[-1])
94	<
91	>	try:
92	>	datasetpath_split = self.datasetPath.split("/")
93	>	# standard style
94	>	self.setParam_('datasetFull', self.datasetPath)
95	>	if self.use_dbs_1 == 1 :
96	>	self.setParam_('dataset', datasetpath_split[1])
97	>	self.setParam_('owner', datasetpath_split[-1])
98	>	else:
99	>	self.setParam_('dataset', datasetpath_split[1])
100	>	self.setParam_('owner', datasetpath_split[2])
101	>	except:
102	>	self.setParam_('dataset', self.datasetPath)
103	>	self.setParam_('owner', self.datasetPath)
104	>
105		self.setTaskid_()
106		self.setParam_('taskId', self.cfg_params['taskId'])
107
#	Line 82 \| Line 124 \| class Cmssw(JobType):
124		try:
125		self.pset = cfg_params['CMSSW.pset']
126		log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
127	<	if (not os.path.exists(self.pset)):
128	<	raise CrabException("User defined PSet file "+self.pset+" does not exist")
127	>	if self.pset.lower() != 'none' :
128	>	if (not os.path.exists(self.pset)):
129	>	raise CrabException("User defined PSet file "+self.pset+" does not exist")
130	>	else:
131	>	self.pset = None
132		except KeyError:
133		raise CrabException("PSet file missing. Cannot run cmsRun ")
134
135		# output files
136	+	## stuff which must be returned always via sandbox
137	+	self.output_file_sandbox = []
138	+
139	+	# add fjr report by default via sandbox
140	+	self.output_file_sandbox.append(self.fjrFileName)
141	+
142	+	# other output files to be returned via sandbox or copied to SE
143		try:
144		self.output_file = []
93	–
145		tmp = cfg_params['CMSSW.output_file']
146		if tmp != '':
147		tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
#	Line 100 \| Line 151 \| class Cmssw(JobType):
151		self.output_file.append(tmp)
152		pass
153		else:
154	<	log.message("No output file defined: only stdout/err will be available")
154	>	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
155		pass
156		pass
157		except KeyError:
158	<	log.message("No output file defined: only stdout/err will be available")
158	>	log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available\n")
159		pass
160
161		# script_exe file as additional file in inputSandbox
162		try:
163		self.scriptExe = cfg_params['USER.script_exe']
113	–	self.additional_inbox_files.append(self.scriptExe)
164		if self.scriptExe != '':
165		if not os.path.isfile(self.scriptExe):
166	<	msg ="WARNING. file "+self.scriptExe+" not found"
166	>	msg ="ERROR. file "+self.scriptExe+" not found"
167		raise CrabException(msg)
168	+	self.additional_inbox_files.append(string.strip(self.scriptExe))
169		except KeyError:
170	<	pass
171	<
170	>	self.scriptExe = ''
171	>
172	>	#CarlosDaniele
173	>	if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
174	>	msg ="Error. script_exe not defined"
175	>	raise CrabException(msg)
176	>
177		## additional input files
178		try:
179	<	tmpAddFiles = string.split(cfg_params['CMSSW.additional_input_files'],',')
179	>	tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
180		for tmp in tmpAddFiles:
181	<	if not os.path.exists(tmp):
182	<	raise CrabException("Additional input file not found: "+tmp)
183	<	tmp=string.strip(tmp)
184	<	self.additional_inbox_files.append(tmp)
181	>	tmp = string.strip(tmp)
182	>	dirname = ''
183	>	if not tmp[0]=="/": dirname = "."
184	>	files = []
185	>	if string.find(tmp,"*")>-1:
186	>	files = glob.glob(os.path.join(dirname, tmp))
187	>	if len(files)==0:
188	>	raise CrabException("No additional input file found with this pattern: "+tmp)
189	>	else:
190	>	files.append(tmp)
191	>	for file in files:
192	>	if not os.path.exists(file):
193	>	raise CrabException("Additional input file not found: "+file)
194	>	pass
195	>	# fname = string.split(file, '/')[-1]
196	>	# storedFile = common.work_space.pathForTgz()+'share/'+fname
197	>	# shutil.copyfile(file, storedFile)
198	>	self.additional_inbox_files.append(string.strip(file))
199		pass
200		pass
201	+	common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
202		except KeyError:
203		pass
204
205		# files per job
206		try:
207	<	self.filesPerJob = int(cfg_params['CMSSW.files_per_jobs']) #Daniele
208	<	self.selectFilesPerJob = 1
207	>	if (cfg_params['CMSSW.files_per_jobs']):
208	>	raise CrabException("files_per_jobs no longer supported. Quitting.")
209		except KeyError:
210	<	self.filesPerJob = 0
140	<	self.selectFilesPerJob = 0
210	>	pass
211
212		## Events per job
213		try:
#	Line 147 \| Line 217 \| class Cmssw(JobType):
217		self.eventsPerJob = -1
218		self.selectEventsPerJob = 0
219
220	<	# To be implemented
221	<	# ## number of jobs
222	<	# try:
223	<	# self.numberOfJobs =int( cfg_params['CMSSW.number_of_job'])
224	<	# self.selectNumberOfJobs = 1
225	<	# except KeyError:
226	<	# self.selectNumberOfJobs = 0
157	<
158	<	if (self.selectFilesPerJob == self.selectEventsPerJob):
159	<	msg = 'Must define either files_per_jobs or events_per_job'
160	<	raise CrabException(msg)
220	>	## number of jobs
221	>	try:
222	>	self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
223	>	self.selectNumberOfJobs = 1
224	>	except KeyError:
225	>	self.theNumberOfJobs = 0
226	>	self.selectNumberOfJobs = 0
227
162	–	if (self.selectEventsPerJob and not self.datasetPath == None):
163	–	msg = 'Splitting according to events_per_job available only with None as datasetpath'
164	–	raise CrabException(msg)
165	–
228		try:
229		self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
230	+	self.selectTotalNumberEvents = 1
231		except KeyError:
232	<	msg = 'Must define total_number_of_events'
233	<	raise CrabException(msg)
234	<
235	<	CEBlackList = []
232	>	self.total_number_of_events = 0
233	>	self.selectTotalNumberEvents = 0
234	>
235	>	if self.pset != None: #CarlosDaniele
236	>	if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
237	>	msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
238	>	raise CrabException(msg)
239	>	else:
240	>	if (self.selectNumberOfJobs == 0):
241	>	msg = 'Must specify number_of_jobs.'
242	>	raise CrabException(msg)
243	>
244	>	## source seed for pythia
245		try:
246	<	tmpBad = string.split(cfg_params['EDG.ce_black_list'],',')
175	<	for tmp in tmpBad:
176	<	tmp=string.strip(tmp)
177	<	CEBlackList.append(tmp)
246	>	self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
247		except KeyError:
248	<	pass
248	>	self.sourceSeed = None
249	>	common.logger.debug(5,"No seed given")
250
251	<	self.reCEBlackList=[]
252	<	for bad in CEBlackList:
183	<	self.reCEBlackList.append(re.compile( bad ))
184	<
185	<	common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
186	<
187	<	CEWhiteList = []
188	<	try:
189	<	tmpGood = string.split(cfg_params['EDG.ce_white_list'],',')
190	<	for tmp in tmpGood:
191	<	tmp=string.strip(tmp)
192	<	CEWhiteList.append(tmp)
251	>	try:
252	>	self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
253		except KeyError:
254	<	pass
254	>	self.sourceSeedVtx = None
255	>	common.logger.debug(5,"No vertex seed given")
256
257	<	#print 'CEWhiteList: ',CEWhiteList
258	<	self.reCEWhiteList=[]
259	<	for Good in CEWhiteList:
260	<	self.reCEWhiteList.append(re.compile( Good ))
257	>	try:
258	>	self.sourceSeedG4 = int(cfg_params['CMSSW.g4_seed'])
259	>	except KeyError:
260	>	self.sourceSeedG4 = None
261	>	common.logger.debug(5,"No g4 sim hits seed given")
262
263	<	common.logger.debug(5,'CEWhiteList: '+str(CEWhiteList))
263	>	try:
264	>	self.sourceSeedMix = int(cfg_params['CMSSW.mix_seed'])
265	>	except KeyError:
266	>	self.sourceSeedMix = None
267	>	common.logger.debug(5,"No mix seed given")
268
269	<	self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
269	>	try:
270	>	self.firstRun = int(cfg_params['CMSSW.first_run'])
271	>	except KeyError:
272	>	self.firstRun = None
273	>	common.logger.debug(5,"No first run given")
274	>	if self.pset != None: #CarlosDaniele
275	>	ver = string.split(self.version,"_")
276	>	if (int(ver[1])>=1 and int(ver[2])>=5):
277	>	import PsetManipulator150 as pp
278	>	else:
279	>	import PsetManipulator as pp
280	>	PsetEdit = pp.PsetManipulator(self.pset) #Daniele Pset
281
282		#DBSDLS-start
283		## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
284		self.maxEvents=0 # max events available ( --> check the requested nb. of evts in Creator.py)
285		self.DBSPaths={} # all dbs paths requested ( --> input to the site local discovery script)
286	+	self.jobDestination=[] # Site destination(s) for each job (list of lists)
287		## Perform the data location and discovery (based on DBS/DLS)
288		## SL: Don't if NONE is specified as input (pythia use case)
289	<	common.analisys_common_info['sites']=None
289	>	blockSites = {}
290		if self.datasetPath:
291	<	self.DataDiscoveryAndLocation(cfg_params)
291	>	blockSites = self.DataDiscoveryAndLocation(cfg_params)
292		#DBSDLS-end
293
294		self.tgzNameWithPath = self.getTarBall(self.executable)
217	–
218	–	# modify Pset
219	–	if (self.datasetPath): # standard job
220	–	self.PsetEdit.maxEvent(self.eventsPerJob) #Daniele
221	–	self.PsetEdit.inputModule("INPUT") #Daniele
222	–
223	–	else: # pythia like job
224	–	self.PsetEdit.maxEvent(self.eventsPerJob) #Daniele
225	–	self.PsetEdit.pythiaSeed("INPUT") #Daniele
226	–	try:
227	–	self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
228	–	except KeyError:
229	–	self.sourceSeed = 123456
230	–	common.logger.message("No seed given, will use "+str(self.sourceSeed))
231	–
232	–	self.PsetEdit.psetWriter(self.configFilename())
295
296		## Select Splitting
297	<	if self.selectFilesPerJob: self.jobSplittingPerFiles()
298	<	elif self.selectEventsPerJob: self.jobSplittingPerEvents()
297	>	if self.selectNoInput:
298	>	if self.pset == None: #CarlosDaniele
299	>	self.jobSplittingForScript()
300	>	else:
301	>	self.jobSplittingNoInput()
302		else:
303	<	msg = 'Don\'t know how to split...'
239	<	raise CrabException(msg)
303	>	self.jobSplittingByBlocks(blockSites)
304
305	+	# modify Pset
306	+	if self.pset != None: #CarlosDaniele
307	+	try:
308	+	if (self.datasetPath): # standard job
309	+	# allow to processa a fraction of events in a file
310	+	PsetEdit.inputModule("INPUT")
311	+	PsetEdit.maxEvent("INPUTMAXEVENTS")
312	+	PsetEdit.skipEvent("INPUTSKIPEVENTS")
313	+	else: # pythia like job
314	+	PsetEdit.maxEvent(self.eventsPerJob)
315	+	if (self.firstRun):
316	+	PsetEdit.pythiaFirstRun("INPUTFIRSTRUN") #First Run
317	+	if (self.sourceSeed) :
318	+	PsetEdit.pythiaSeed("INPUT")
319	+	if (self.sourceSeedVtx) :
320	+	PsetEdit.vtxSeed("INPUTVTX")
321	+	if (self.sourceSeedG4) :
322	+	PsetEdit.g4Seed("INPUTG4")
323	+	if (self.sourceSeedMix) :
324	+	PsetEdit.mixSeed("INPUTMIX")
325	+	# add FrameworkJobReport to parameter-set
326	+	PsetEdit.addCrabFJR(self.fjrFileName)
327	+	PsetEdit.psetWriter(self.configFilename())
328	+	except:
329	+	msg='Error while manipuliating ParameterSet: exiting...'
330	+	raise CrabException(msg)
331
332		def DataDiscoveryAndLocation(self, cfg_params):
333
334	+	import DataDiscovery
335	+	import DataDiscovery_DBS2
336	+	import DataLocation
337		common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
338
339		datasetPath=self.datasetPath
340
248	–	## TODO
249	–	dataTiersList = ""
250	–	dataTiers = dataTiersList.split(',')
251	–
341		## Contact the DBS
342	+	common.logger.message("Contacting Data Discovery Services ...")
343		try:
344	<	self.pubdata=DataDiscovery_EDM.DataDiscovery_EDM(datasetPath, dataTiers, cfg_params)
344	>
345	>	if self.use_dbs_1 == 1 :
346	>	self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params)
347	>	else :
348	>	self.pubdata=DataDiscovery_DBS2.DataDiscovery_DBS2(datasetPath, cfg_params)
349		self.pubdata.fetchDBSInfo()
350
351	<	except DataDiscovery_EDM.NotExistingDatasetError, ex :
351	>	except DataDiscovery.NotExistingDatasetError, ex :
352		msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
353		raise CrabException(msg)
354	<
261	<	except DataDiscovery_EDM.NoDataTierinProvenanceError, ex :
354	>	except DataDiscovery.NoDataTierinProvenanceError, ex :
355		msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
356		raise CrabException(msg)
357	<	except DataDiscovery_EDM.DataDiscoveryError, ex:
358	<	msg = 'ERROR ***: failed Data Discovery in DBS %s'%ex.getErrorMessage()
357	>	except DataDiscovery.DataDiscoveryError, ex:
358	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
359	>	raise CrabException(msg)
360	>	except DataDiscovery_DBS2.NotExistingDatasetError_DBS2, ex :
361	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
362	>	raise CrabException(msg)
363	>	except DataDiscovery_DBS2.NoDataTierinProvenanceError_DBS2, ex :
364	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
365	>	raise CrabException(msg)
366	>	except DataDiscovery_DBS2.DataDiscoveryError_DBS2, ex:
367	>	msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
368		raise CrabException(msg)
369
370	<	## get list of all required data in the form of dbs paths (dbs path = /dataset/datatier/owner)
371	<	## self.DBSPaths=self.pubdata.getDBSPaths()
372	<	common.logger.message("Required data are :"+self.datasetPath)
271	<
272	<	filesbyblock=self.pubdata.getFiles()
273	<	self.AllInputFiles=filesbyblock.values()
274	<	self.files = self.AllInputFiles
275	<
276	<	## TEMP
277	<	# self.filesTmp = filesbyblock.values()
278	<	# self.files = []
279	<	# locPath='rfio:cmsbose2.bo.infn.it:/flatfiles/SE00/cms/fanfani/ProdTest/'
280	<	# locPath=''
281	<	# tmp = []
282	<	# for file in self.filesTmp[0]:
283	<	# tmp.append(locPath+file)
284	<	# self.files.append(tmp)
285	<	## END TEMP
370	>	self.filesbyblock=self.pubdata.getFiles()
371	>	self.eventsbyblock=self.pubdata.getEventsPerBlock()
372	>	self.eventsbyfile=self.pubdata.getEventsPerFile()
373
374		## get max number of events
288	–	#common.logger.debug(10,"number of events for primary fileblocks %i"%self.pubdata.getMaxEvents())
375		self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py
290	–	common.logger.message("\nThe number of available events is %s"%self.maxEvents)
376
377		## Contact the DLS and build a list of sites hosting the fileblocks
378		try:
379	<	dataloc=DataLocation_EDM.DataLocation_EDM(filesbyblock.keys(),cfg_params)
379	>	dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
380		dataloc.fetchDLSInfo()
381	<	except DataLocation_EDM.DataLocationError , ex:
381	>	except DataLocation.DataLocationError , ex:
382		msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
383		raise CrabException(msg)
384
300	–	allsites=dataloc.getSites()
301	–	common.logger.debug(5,"sites are %s"%allsites)
302	–	sites=self.checkBlackList(allsites)
303	–	common.logger.debug(5,"sites are (after black list) %s"%sites)
304	–	sites=self.checkWhiteList(sites)
305	–	common.logger.debug(5,"sites are (after white list) %s"%sites)
385
386	<	if len(sites)==0:
387	<	msg = 'No sites hosting all the needed data! Exiting... '
388	<	raise CrabException(msg)
386	>	sites = dataloc.getSites()
387	>	allSites = []
388	>	listSites = sites.values()
389	>	for listSite in listSites:
390	>	for oneSite in listSite:
391	>	allSites.append(oneSite)
392	>	allSites = self.uniquelist(allSites)
393
394	<	common.logger.message("List of Sites hosting the data : "+str(sites))
395	<	common.logger.debug(6, "List of Sites: "+str(sites))
396	<	common.analisys_common_info['sites']=sites ## used in SchedulerEdg.py in createSchScript
397	<	self.setParam_('TargetCE', ','.join(sites))
315	<	return
394	>	# screen output
395	>	common.logger.message("Requested dataset: " + datasetPath + " has " + str(self.maxEvents) + " events in " + str(len(self.filesbyblock.keys())) + " blocks.\n")
396	>
397	>	return sites
398
399	<	def jobSplittingPerFiles(self):
318	<	"""
319	<	Perform job splitting based on number of files to be accessed per job
399	>	def jobSplittingByBlocks(self, blockSites):
400		"""
401	<	common.logger.debug(5,'Splitting per input files')
402	<	common.logger.message('Required '+str(self.filesPerJob)+' files per job ')
403	<	common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
404	<
405	<	## TODO: SL need to have (from DBS) a detailed list of how many events per each file
406	<	n_tot_files = (len(self.files[0]))
407	<	#print "n_tot_files = ", n_tot_files
408	<	## SL: this is wrong if the files have different number of events
409	<	#print "self.maxEvents = ", self.maxEvents
410	<	evPerFile = int(self.maxEvents)/n_tot_files
411	<	#print "evPerFile = int(self.maxEvents)/n_tot_files = ", evPerFile
412	<
413	<	common.logger.debug(5,'Events per File '+str(evPerFile))
414	<
415	<	## if asked to process all events, do it
416	<	if self.total_number_of_events == -1:
417	<	self.total_number_of_events=self.maxEvents
418	<	self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
419	<	common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for all available events '+str(self.total_number_of_events)+' events')
420	<
401	>	Perform job splitting. Jobs run over an integer number of files
402	>	and no more than one block.
403	>	ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
404	>	REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
405	>	self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
406	>	self.maxEvents, self.filesbyblock
407	>	SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
408	>	self.total_number_of_jobs - Total # of jobs
409	>	self.list_of_args - File(s) job will run on (a list of lists)
410	>	"""
411	>
412	>	# ---- Handle the possible job splitting configurations ---- #
413	>	if (self.selectTotalNumberEvents):
414	>	totalEventsRequested = self.total_number_of_events
415	>	if (self.selectEventsPerJob):
416	>	eventsPerJobRequested = self.eventsPerJob
417	>	if (self.selectNumberOfJobs):
418	>	totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
419	>
420	>	# If user requested all the events in the dataset
421	>	if (totalEventsRequested == -1):
422	>	eventsRemaining=self.maxEvents
423	>	# If user requested more events than are in the dataset
424	>	elif (totalEventsRequested > self.maxEvents):
425	>	eventsRemaining = self.maxEvents
426	>	common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
427	>	# If user requested less events than are in the dataset
428		else:
429	<	#print "self.total_number_of_events = ", self.total_number_of_events
430	<	#print "evPerFile = ", evPerFile
431	<	self.total_number_of_files = int(self.total_number_of_events/evPerFile)
432	<	#print "self.total_number_of_files = int(self.total_number_of_events/evPerFile) = " , self.total_number_of_files
433	<	## SL: if ask for less event than what is computed to be available on a
434	<	## file, process the first file anyhow.
435	<	if self.total_number_of_files == 0:
436	<	self.total_number_of_files = self.total_number_of_files + 1
350	<
429	>	eventsRemaining = totalEventsRequested
430	>
431	>	# If user requested more events per job than are in the dataset
432	>	if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
433	>	eventsPerJobRequested = self.maxEvents
434	>
435	>	# For user info at end
436	>	totalEventCount = 0
437
438	<	common.logger.debug(5,'N files '+str(self.total_number_of_files))
438	>	if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
439	>	eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
440
441	<	check = 0
441	>	if (self.selectNumberOfJobs):
442	>	common.logger.message("May not create the exact number_of_jobs requested.")
443	>
444	>	if ( self.ncjobs == 'all' ) :
445	>	totalNumberOfJobs = 999999999
446	>	else :
447	>	totalNumberOfJobs = self.ncjobs
448
356	–	## Compute the number of jobs
357	–	#self.total_number_of_jobs = int(n_tot_files)*1/int(self.filesPerJob)
358	–	#print "self.total_number_of_files = ", self.total_number_of_files
359	–	#print "self.filesPerJob = ", self.filesPerJob
360	–	self.total_number_of_jobs = int(self.total_number_of_files/self.filesPerJob)
361	–	#print "self.total_number_of_jobs = ", self.total_number_of_jobs
362	–	common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
363	–
364	–	## is there any remainder?
365	–	check = int(self.total_number_of_files) - (int(self.total_number_of_jobs)*self.filesPerJob)
366	–
367	–	common.logger.debug(5,'Check '+str(check))
368	–
369	–	if check > 0:
370	–	self.total_number_of_jobs = self.total_number_of_jobs + 1
371	–	common.logger.message('Warning: last job will be created with '+str(check)+' files')
449
450	<	#common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs-1)self.filesPerJobevPerFile + check*evPerFile)+' events')
451	<	common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str((self.total_number_of_jobs)self.filesPerJobevPerFile + check*evPerFile)+' events')
452	<	pass
450	>	blocks = blockSites.keys()
451	>	blockCount = 0
452	>	# Backup variable in case self.maxEvents counted events in a non-included block
453	>	numBlocksInDataset = len(blocks)
454
455	+	jobCount = 0
456		list_of_lists = []
457	<	for i in xrange(0, int(n_tot_files), self.filesPerJob):
458	<	parString = "\\{"
457	>
458	>	# list tracking which jobs are in which jobs belong to which block
459	>	jobsOfBlock = {}
460	>
461	>	# ---- Iterate over the blocks in the dataset until ---- #
462	>	# ---- we've met the requested total # of events ---- #
463	>	while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
464	>	block = blocks[blockCount]
465	>	blockCount += 1
466	>	if block not in jobsOfBlock.keys() :
467	>	jobsOfBlock[block] = []
468
469	<	params = self.files[0][i: i+self.filesPerJob]
470	<	for i in range(len(params) - 1):
471	<	parString += '\\\"' + params[i] + '\\\"\,'
469	>	if self.eventsbyblock.has_key(block) :
470	>	numEventsInBlock = self.eventsbyblock[block]
471	>	common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
472
473	<	parString += '\\\"' + params[len(params) - 1] + '\\\"\\}'
474	<	list_of_lists.append(parString)
475	<	pass
473	>	files = self.filesbyblock[block]
474	>	numFilesInBlock = len(files)
475	>	if (numFilesInBlock <= 0):
476	>	continue
477	>	fileCount = 0
478	>
479	>	# ---- New block => New job ---- #
480	>	parString = "\\{"
481	>	# counter for number of events in files currently worked on
482	>	filesEventCount = 0
483	>	# flag if next while loop should touch new file
484	>	newFile = 1
485	>	# job event counter
486	>	jobSkipEventCount = 0
487	>
488	>	# ---- Iterate over the files in the block until we've met the requested ---- #
489	>	# ---- total # of events or we've gone over all the files in this block ---- #
490	>	while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
491	>	file = files[fileCount]
492	>	if newFile :
493	>	try:
494	>	numEventsInFile = self.eventsbyfile[file]
495	>	common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
496	>	# increase filesEventCount
497	>	filesEventCount += numEventsInFile
498	>	# Add file to current job
499	>	parString += '\\\"' + file + '\\\"\,'
500	>	newFile = 0
501	>	except KeyError:
502	>	common.logger.message("File "+str(file)+" has unknown number of events: skipping")
503	>
504	>
505	>	# if less events in file remain than eventsPerJobRequested
506	>	if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
507	>	# if last file in block
508	>	if ( fileCount == numFilesInBlock-1 ) :
509	>	# end job using last file, use remaining events in block
510	>	# close job and touch new file
511	>	fullString = parString[:-2]
512	>	fullString += '\\}'
513	>	list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
514	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
515	>	self.jobDestination.append(blockSites[block])
516	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
517	>	# fill jobs of block dictionary
518	>	jobsOfBlock[block].append(jobCount+1)
519	>	# reset counter
520	>	jobCount = jobCount + 1
521	>	totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
522	>	eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
523	>	jobSkipEventCount = 0
524	>	# reset file
525	>	parString = "\\{"
526	>	filesEventCount = 0
527	>	newFile = 1
528	>	fileCount += 1
529	>	else :
530	>	# go to next file
531	>	newFile = 1
532	>	fileCount += 1
533	>	# if events in file equal to eventsPerJobRequested
534	>	elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
535	>	# close job and touch new file
536	>	fullString = parString[:-2]
537	>	fullString += '\\}'
538	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
539	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
540	>	self.jobDestination.append(blockSites[block])
541	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
542	>	jobsOfBlock[block].append(jobCount+1)
543	>	# reset counter
544	>	jobCount = jobCount + 1
545	>	totalEventCount = totalEventCount + eventsPerJobRequested
546	>	eventsRemaining = eventsRemaining - eventsPerJobRequested
547	>	jobSkipEventCount = 0
548	>	# reset file
549	>	parString = "\\{"
550	>	filesEventCount = 0
551	>	newFile = 1
552	>	fileCount += 1
553	>
554	>	# if more events in file remain than eventsPerJobRequested
555	>	else :
556	>	# close job but don't touch new file
557	>	fullString = parString[:-2]
558	>	fullString += '\\}'
559	>	list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
560	>	common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
561	>	self.jobDestination.append(blockSites[block])
562	>	common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
563	>	jobsOfBlock[block].append(jobCount+1)
564	>	# increase counter
565	>	jobCount = jobCount + 1
566	>	totalEventCount = totalEventCount + eventsPerJobRequested
567	>	eventsRemaining = eventsRemaining - eventsPerJobRequested
568	>	# calculate skip events for last file
569	>	# use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
570	>	jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
571	>	# remove all but the last file
572	>	filesEventCount = self.eventsbyfile[file]
573	>	parString = "\\{"
574	>	parString += '\\\"' + file + '\\\"\,'
575	>	pass # END if
576	>	pass # END while (iterate over files in the block)
577	>	pass # END while (iterate over blocks in the dataset)
578	>	self.ncjobs = self.total_number_of_jobs = jobCount
579	>	if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
580	>	common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
581	>	common.logger.message(str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
582	>
583	>	# screen output
584	>	screenOutput = "List of jobs and available destination sites:\n\n"
585	>
586	>	# keep trace of block with no sites to print a warning at the end
587	>	noSiteBlock = []
588	>	bloskNoSite = []
589	>
590	>	blockCounter = 0
591	>	for block in blocks:
592	>	if block in jobsOfBlock.keys() :
593	>	blockCounter += 1
594	>	screenOutput += "Block %5i: jobs %20s: sites: %s\n" % (blockCounter,spanRanges(jobsOfBlock[block]),','.join(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)))
595	>	if len(self.blackWhiteListParser.checkWhiteList(self.blackWhiteListParser.checkBlackList(blockSites[block],block),block)) == 0:
596	>	noSiteBlock.append( spanRanges(jobsOfBlock[block]) )
597	>	bloskNoSite.append( blockCounter )
598	>
599	>	common.logger.message(screenOutput)
600	>	if len(noSiteBlock) > 0 and len(bloskNoSite) > 0:
601	>	msg = 'WARNING: No sites are hosting any part of data for block:\n '
602	>	virgola = ""
603	>	if len(bloskNoSite) > 1:
604	>	virgola = ","
605	>	for block in bloskNoSite:
606	>	msg += ' ' + str(block) + virgola
607	>	msg += '\n Related jobs:\n '
608	>	virgola = ""
609	>	if len(noSiteBlock) > 1:
610	>	virgola = ","
611	>	for range_jobs in noSiteBlock:
612	>	msg += str(range_jobs) + virgola
613	>	msg += '\n will not be submitted and this block of data can not be analyzed!\n'
614	>	common.logger.message(msg)
615
616		self.list_of_args = list_of_lists
390	–	#print self.list_of_args
617		return
618
619	<	def jobSplittingPerEvents(self):
619	>	def jobSplittingNoInput(self):
620		"""
621		Perform job splitting based on number of event per job
622		"""
623		common.logger.debug(5,'Splitting per events')
624		common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
625	+	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
626		common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
627
628		if (self.total_number_of_events < 0):
629		msg='Cannot split jobs per Events with "-1" as total number of events'
630		raise CrabException(msg)
631
632	<	self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
633	<
634	<	print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
635	<	print "self.total_number_of_events = ", self.total_number_of_events
636	<	print "self.eventsPerJob = ", self.eventsPerJob
637	<	print "self.total_number_of_jobs = ", self.total_number_of_jobs
638	<	print "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
639	<
632	>	if (self.selectEventsPerJob):
633	>	if (self.selectTotalNumberEvents):
634	>	self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
635	>	elif(self.selectNumberOfJobs) :
636	>	self.total_number_of_jobs =self.theNumberOfJobs
637	>	self.total_number_of_events =int(self.theNumberOfJobs*self.eventsPerJob)
638	>
639	>	elif (self.selectNumberOfJobs) :
640	>	self.total_number_of_jobs = self.theNumberOfJobs
641	>	self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
642	>
643		common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
644
645		# is there any remainder?
#	Line 417 \| Line 647 \| class Cmssw(JobType):
647
648		common.logger.debug(5,'Check '+str(check))
649
650	+	common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
651		if check > 0:
652	<	common.logger.message('Warning: asked '+self.total_number_of_events+' but will do only '+(int(self.total_number_of_jobs)*self.eventsPerJob))
422	<
423	<	common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
652	>	common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
653
654		# argument is seed number.$i
655		self.list_of_args = []
656		for i in range(self.total_number_of_jobs):
657	<	self.list_of_args.append(int(str(self.sourceSeed)+str(i)))
658	<	print self.list_of_args
657	>	## Since there is no input, any site is good
658	>	# self.jobDestination.append(["Any"])
659	>	self.jobDestination.append([""]) #must be empty to write correctly the xml
660	>	args=[]
661	>	if (self.firstRun):
662	>	## pythia first run
663	>	#self.list_of_args.append([(str(self.firstRun)+str(i))])
664	>	args.append(str(self.firstRun)+str(i))
665	>	else:
666	>	## no first run
667	>	#self.list_of_args.append([str(i)])
668	>	args.append(str(i))
669	>	if (self.sourceSeed):
670	>	args.append(str(self.sourceSeed)+str(i))
671	>	if (self.sourceSeedVtx):
672	>	## + vtx random seed
673	>	args.append(str(self.sourceSeedVtx)+str(i))
674	>	if (self.sourceSeedG4):
675	>	## + G4 random seed
676	>	args.append(str(self.sourceSeedG4)+str(i))
677	>	if (self.sourceSeedMix):
678	>	## + Mix random seed
679	>	args.append(str(self.sourceSeedMix)+str(i))
680	>	pass
681	>	pass
682	>	self.list_of_args.append(args)
683	>	pass
684	>
685	>	# print self.list_of_args
686	>
687	>	return
688	>
689	>
690	>	def jobSplittingForScript(self):#CarlosDaniele
691	>	"""
692	>	Perform job splitting based on number of job
693	>	"""
694	>	common.logger.debug(5,'Splitting per job')
695	>	common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
696
697	+	self.total_number_of_jobs = self.theNumberOfJobs
698	+
699	+	common.logger.debug(5,'N jobs '+str(self.total_number_of_jobs))
700	+
701	+	common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
702	+
703	+	# argument is seed number.$i
704	+	self.list_of_args = []
705	+	for i in range(self.total_number_of_jobs):
706	+	## Since there is no input, any site is good
707	+	# self.jobDestination.append(["Any"])
708	+	self.jobDestination.append([""])
709	+	## no random seed
710	+	self.list_of_args.append([str(i)])
711		return
712
713		def split(self, jobParams):
#	Line 441 \| Line 721 \| class Cmssw(JobType):
721		jobParams.append("")
722
723		for job in range(njobs):
724	<	jobParams[job] = str(arglist[job])
724	>	jobParams[job] = arglist[job]
725	>	# print str(arglist[job])
726	>	# print jobParams[job]
727		common.jobDB.setArguments(job, jobParams[job])
728	+	common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
729	+	common.jobDB.setDestination(job, self.jobDestination[job])
730
731		common.jobDB.save()
732		return
733
734		def getJobTypeArguments(self, nj, sched):
735	<	return common.jobDB.arguments(nj)
735	>	result = ''
736	>	for i in common.jobDB.arguments(nj):
737	>	result=result+str(i)+" "
738	>	return result
739
740		def numberOfJobs(self):
741		# Fabio
742		return self.total_number_of_jobs
743
457	–	def checkBlackList(self, allSites):
458	–	if len(self.reCEBlackList)==0: return allSites
459	–	sites = []
460	–	for site in allSites:
461	–	common.logger.debug(10,'Site '+site)
462	–	good=1
463	–	for re in self.reCEBlackList:
464	–	if re.search(site):
465	–	common.logger.message('CE in black list, skipping site '+site)
466	–	good=0
467	–	pass
468	–	if good: sites.append(site)
469	–	if len(sites) == 0:
470	–	common.logger.debug(3,"No sites found after BlackList")
471	–	return sites
472	–
473	–	def checkWhiteList(self, allSites):
474	–
475	–	if len(self.reCEWhiteList)==0: return allSites
476	–	sites = []
477	–	for site in allSites:
478	–	good=0
479	–	for re in self.reCEWhiteList:
480	–	if re.search(site):
481	–	common.logger.debug(5,'CE in white list, adding site '+site)
482	–	good=1
483	–	if not good: continue
484	–	sites.append(site)
485	–	if len(sites) == 0:
486	–	common.logger.message("No sites found after WhiteList\n")
487	–	else:
488	–	common.logger.debug(5,"Selected sites via WhiteList are "+str(sites)+"\n")
489	–	return sites
490	–
744		def getTarBall(self, exe):
745		"""
746		Return the TarBall with lib and exe
747		"""
748
749		# if it exist, just return it
750	<	self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
750	>	#
751	>	# Marco. Let's start to use relative path for Boss XML files
752	>	#
753	>	self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
754		if os.path.exists(self.tgzNameWithPath):
755		return self.tgzNameWithPath
756
#	Line 508 \| Line 764 \| class Cmssw(JobType):
764		# First of all declare the user Scram area
765		swArea = self.scram.getSWArea_()
766		#print "swArea = ", swArea
767	<	swVersion = self.scram.getSWVersion()
768	<	#print "swVersion = ", swVersion
767	>	# swVersion = self.scram.getSWVersion()
768	>	# print "swVersion = ", swVersion
769		swReleaseTop = self.scram.getReleaseTop_()
770		#print "swReleaseTop = ", swReleaseTop
771
#	Line 517 \| Line 773 \| class Cmssw(JobType):
773		if swReleaseTop == '' or swArea == swReleaseTop:
774		return
775
776	<	filesToBeTarred = []
777	<	## First find the executable
778	<	if (self.executable != ''):
779	<	exeWithPath = self.scram.findFile_(executable)
780	<	# print exeWithPath
781	<	if ( not exeWithPath ):
782	<	raise CrabException('User executable '+executable+' not found')
783	<
784	<	## then check if it's private or not
785	<	if exeWithPath.find(swReleaseTop) == -1:
786	<	# the exe is private, so we must ship
787	<	common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
788	<	path = swArea+'/'
789	<	exe = string.replace(exeWithPath, path,'')
790	<	filesToBeTarred.append(exe)
791	<	pass
792	<	else:
793	<	# the exe is from release, we'll find it on WN
794	<	pass
795	<
796	<	## Now get the libraries: only those in local working area
797	<	libDir = 'lib'
798	<	lib = swArea+'/' +libDir
799	<	common.logger.debug(5,"lib "+lib+" to be tarred")
800	<	if os.path.exists(lib):
801	<	filesToBeTarred.append(libDir)
802	<
803	<	## Now check if module dir is present
804	<	moduleDir = 'module'
805	<	if os.path.isdir(swArea+'/'+moduleDir):
806	<	filesToBeTarred.append(moduleDir)
807	<
808	<	## Now check if the Data dir is present
809	<	dataDir = 'src/Data/'
810	<	if os.path.isdir(swArea+'/'+dataDir):
811	<	filesToBeTarred.append(dataDir)
812	<
813	<	## Create the tar-ball
814	<	if len(filesToBeTarred)>0:
815	<	cwd = os.getcwd()
816	<	os.chdir(swArea)
817	<	tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
818	<	for line in filesToBeTarred:
819	<	tarcmd = tarcmd + line + ' '
820	<	cout = runCommand(tarcmd)
821	<	if not cout:
822	<	raise CrabException('Could not create tar-ball')
823	<	os.chdir(cwd)
824	<	else:
825	<	common.logger.debug(5,"No files to be to be tarred")
776	>	import tarfile
777	>	try: # create tar ball
778	>	tar = tarfile.open(self.tgzNameWithPath, "w:gz")
779	>	## First find the executable
780	>	if (self.executable != ''):
781	>	exeWithPath = self.scram.findFile_(executable)
782	>	if ( not exeWithPath ):
783	>	raise CrabException('User executable '+executable+' not found')
784	>
785	>	## then check if it's private or not
786	>	if exeWithPath.find(swReleaseTop) == -1:
787	>	# the exe is private, so we must ship
788	>	common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
789	>	path = swArea+'/'
790	>	# distinguish case when script is in user project area or given by full path somewhere else
791	>	if exeWithPath.find(path) >= 0 :
792	>	exe = string.replace(exeWithPath, path,'')
793	>	tar.add(path+exe,os.path.basename(executable))
794	>	else :
795	>	tar.add(exeWithPath,os.path.basename(executable))
796	>	pass
797	>	else:
798	>	# the exe is from release, we'll find it on WN
799	>	pass
800	>
801	>	## Now get the libraries: only those in local working area
802	>	libDir = 'lib'
803	>	lib = swArea+'/' +libDir
804	>	common.logger.debug(5,"lib "+lib+" to be tarred")
805	>	if os.path.exists(lib):
806	>	tar.add(lib,libDir)
807	>
808	>	## Now check if module dir is present
809	>	moduleDir = 'module'
810	>	module = swArea + '/' + moduleDir
811	>	if os.path.isdir(module):
812	>	tar.add(module,moduleDir)
813	>
814	>	## Now check if any data dir(s) is present
815	>	swAreaLen=len(swArea)
816	>	for root, dirs, files in os.walk(swArea):
817	>	if "data" in dirs:
818	>	common.logger.debug(5,"data "+root+"/data"+" to be tarred")
819	>	tar.add(root+"/data",root[swAreaLen:]+"/data")
820	>
821	>	## Add ProdAgent dir to tar
822	>	paDir = 'ProdAgentApi'
823	>	pa = os.environ['CRABDIR'] + '/' + 'ProdAgentApi'
824	>	if os.path.isdir(pa):
825	>	tar.add(pa,paDir)
826	>
827	>	### FEDE FOR DBS PUBLICATION
828	>	## Add PRODCOMMON dir to tar
829	>	prodcommonDir = 'ProdCommon'
830	>	prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
831	>	if os.path.isdir(prodcommonPath):
832	>	tar.add(prodcommonPath,prodcommonDir)
833	>	#############################
834	>
835	>	common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
836	>	tar.close()
837	>	except :
838	>	raise CrabException('Could not create tar-ball')
839	>
840	>	## check for tarball size
841	>	tarballinfo = os.stat(self.tgzNameWithPath)
842	>	if ( tarballinfo.st_size > self.MaxTarBallSize10241024 ) :
843	>	raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
844	>
845	>	## create tar-ball with ML stuff
846	>	self.MLtgzfile = common.work_space.pathForTgz()+'share/MLfiles.tgz'
847	>	try:
848	>	tar = tarfile.open(self.MLtgzfile, "w:gz")
849	>	path=os.environ['CRABDIR'] + '/python/'
850	>	for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py']:
851	>	tar.add(path+file,file)
852	>	common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
853	>	tar.close()
854	>	except :
855	>	raise CrabException('Could not create ML files tar-ball')
856
857		return
858
859	+	def additionalInputFileTgz(self):
860	+	"""
861	+	Put all additional files into a tar ball and return its name
862	+	"""
863	+	import tarfile
864	+	tarName= common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
865	+	tar = tarfile.open(tarName, "w:gz")
866	+	for file in self.additional_inbox_files:
867	+	tar.add(file,string.split(file,'/')[-1])
868	+	common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
869	+	tar.close()
870	+	return tarName
871	+
872		def wsSetupEnvironment(self, nj):
873		"""
874		Returns part of a job script which prepares
#	Line 581 \| Line 880 \| class Cmssw(JobType):
880		## OLI_Daniele at this level middleware already known
881
882		txt += 'if [ $middleware == LCG ]; then \n'
883	+	txt += ' echo "### First set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
884	+	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
885	+	txt += ' export BUILD_ARCH='+self.executable_arch+'\n'
886		txt += self.wsSetupCMSLCGEnvironment_()
887		txt += 'elif [ $middleware == OSG ]; then\n'
888	<	txt += ' time=`date -u +"%s"`\n'
889	<	txt += ' WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
588	<	txt += ' echo "Creating working directory: $WORKING_DIR"\n'
589	<	txt += ' /bin/mkdir -p $WORKING_DIR\n'
888	>	txt += ' WORKING_DIR=`/bin/mktemp -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
889	>	txt += ' echo "Created working directory: $WORKING_DIR"\n'
890		txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
891		txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
892	<	txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
893	<	txt += ' echo "JobExitCode=10016" \| tee -a $RUNTIME_AREA/$repo\n'
894	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
892	>	txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
893	>	txt += ' echo "JobExitCode=10016" \| tee -a $RUNTIME_AREA/$repo\n'
894	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
895		txt += ' rm -f $RUNTIME_AREA/$repo \n'
896		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
597	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
897		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
898		txt += ' exit 1\n'
899		txt += ' fi\n'
#	Line 602 \| Line 901 \| class Cmssw(JobType):
901		txt += ' echo "Change to working directory: $WORKING_DIR"\n'
902		txt += ' cd $WORKING_DIR\n'
903		txt += self.wsSetupCMSOSGEnvironment_()
904	+	txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
905	+	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
906		txt += 'fi\n'
907
908		# Prepare JobType-specific part
#	Line 617 \| Line 918 \| class Cmssw(JobType):
918		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
919		txt += ' rm -f $RUNTIME_AREA/$repo \n'
920		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
620	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
921		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
922		## OLI_Daniele
923		txt += ' if [ $middleware == OSG ]; then \n'
#	Line 625 \| Line 925 \| class Cmssw(JobType):
925		txt += ' cd $RUNTIME_AREA\n'
926		txt += ' /bin/rm -rf $WORKING_DIR\n'
927		txt += ' if [ -d $WORKING_DIR ] ;then\n'
928	<	txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929	<	txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
930	<	txt += ' echo "JobExitCode=10018" \| tee -a $RUNTIME_AREA/$repo\n'
931	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
928	>	txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
929	>	txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
930	>	txt += ' echo "JobExitCode=10018" \| tee -a $RUNTIME_AREA/$repo\n'
931	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
932		txt += ' rm -f $RUNTIME_AREA/$repo \n'
933		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
634	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
934		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
935		txt += ' fi\n'
936		txt += ' fi \n'
#	Line 639 \| Line 938 \| class Cmssw(JobType):
938		txt += 'fi \n'
939		txt += 'echo "CMSSW_VERSION = '+self.version+'"\n'
940		txt += 'cd '+self.version+'\n'
941	+	########## FEDE FOR DBS2 ######################
942	+	txt += 'SOFTWARE_DIR=`pwd`\n'
943	+	txt += 'echo SOFTWARE_DIR=$SOFTWARE_DIR \n'
944	+	###############################################
945		### needed grep for bug in scramv1 ###
946	+	txt += scram+' runtime -sh\n'
947		txt += 'eval `'+scram+' runtime -sh \| grep -v SCRAMRT_LSB_JOBNAME`\n'
948	+	txt += 'echo $PATH\n'
949
950		# Handle the arguments:
951		txt += "\n"
952		txt += "## number of arguments (first argument always jobnumber)\n"
953		txt += "\n"
954	<	txt += "narg=$#\n"
955	<	txt += "if [ $narg -lt 2 ]\n"
954	>	# txt += "narg=$#\n"
955	>	txt += "if [ $nargs -lt 2 ]\n"
956		txt += "then\n"
957	<	txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$narg+ \n"
957	>	txt += " echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
958		txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
959		txt += ' echo "JobExitCode=50113" \| tee -a $RUNTIME_AREA/$repo\n'
960		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
961		txt += ' rm -f $RUNTIME_AREA/$repo \n'
962		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
658	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
963		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
964		## OLI_Daniele
965		txt += ' if [ $middleware == OSG ]; then \n'
#	Line 663 \| Line 967 \| class Cmssw(JobType):
967		txt += ' cd $RUNTIME_AREA\n'
968		txt += ' /bin/rm -rf $WORKING_DIR\n'
969		txt += ' if [ -d $WORKING_DIR ] ;then\n'
970	<	txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971	<	txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
972	<	txt += ' echo "JobExitCode=50114" \| tee -a $RUNTIME_AREA/$repo\n'
973	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
970	>	txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
971	>	txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
972	>	txt += ' echo "JobExitCode=50114" \| tee -a $RUNTIME_AREA/$repo\n'
973	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
974		txt += ' rm -f $RUNTIME_AREA/$repo \n'
975		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
672	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
976		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
977		txt += ' fi\n'
978		txt += ' fi \n'
#	Line 679 \| Line 982 \| class Cmssw(JobType):
982
983		# Prepare job-specific part
984		job = common.job_list[nj]
985	<	pset = os.path.basename(job.configFilename())
986	<	txt += '\n'
987	<	if (self.datasetPath): # standard job
988	<	txt += 'InputFiles=$2\n'
989	<	txt += 'echo "Inputfiles:<$InputFiles>"\n'
990	<	txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
991	<	else: # pythia like job
992	<	txt += 'Seed=$2\n'
993	<	txt += 'echo "Seed: <$Seed>"\n'
994	<	txt += 'sed "s#INPUT#$Seed#" $RUNTIME_AREA/'+pset+' > pset.cfg\n'
985	>	### FEDE FOR DBS OUTPUT PUBLICATION
986	>	if (self.datasetPath):
987	>	txt += '\n'
988	>	txt += 'DatasetPath='+self.datasetPath+'\n'
989	>
990	>	datasetpath_split = self.datasetPath.split("/")
991	>
992	>	txt += 'PrimaryDataset='+datasetpath_split[1]+'\n'
993	>	txt += 'DataTier='+datasetpath_split[2]+'\n'
994	>	#txt += 'ProcessedDataset='+datasetpath_split[3]+'\n'
995	>	txt += 'ApplicationFamily=cmsRun\n'
996	>
997	>	else:
998	>	txt += 'DatasetPath=MCDataTier\n'
999	>	txt += 'PrimaryDataset=null\n'
1000	>	txt += 'DataTier=null\n'
1001	>	#txt += 'ProcessedDataset=null\n'
1002	>	txt += 'ApplicationFamily=MCDataTier\n'
1003	>	if self.pset != None: #CarlosDaniele
1004	>	pset = os.path.basename(job.configFilename())
1005	>	txt += '\n'
1006	>	txt += 'cp $RUNTIME_AREA/'+pset+' .\n'
1007	>	if (self.datasetPath): # standard job
1008	>	#txt += 'InputFiles=$2\n'
1009	>	txt += 'InputFiles=${args[1]}\n'
1010	>	txt += 'MaxEvents=${args[2]}\n'
1011	>	txt += 'SkipEvents=${args[3]}\n'
1012	>	txt += 'echo "Inputfiles:<$InputFiles>"\n'
1013	>	txt += 'sed "s#{\'INPUT\'}#$InputFiles#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1014	>	txt += 'echo "MaxEvents:<$MaxEvents>"\n'
1015	>	txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1016	>	txt += 'echo "SkipEvents:<$SkipEvents>"\n'
1017	>	txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1018	>	else: # pythia like job
1019	>	seedIndex=1
1020	>	if (self.firstRun):
1021	>	txt += 'FirstRun=${args['+str(seedIndex)+']}\n'
1022	>	txt += 'echo "FirstRun: <$FirstRun>"\n'
1023	>	txt += 'sed "s#\<INPUTFIRSTRUN\>#$FirstRun#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1024	>	seedIndex=seedIndex+1
1025	>
1026	>	if (self.sourceSeed):
1027	>	txt += 'Seed=${args['+str(seedIndex)+']}\n'
1028	>	txt += 'sed "s#\<INPUT\>#$Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1029	>	seedIndex=seedIndex+1
1030	>	## the following seeds are not always present
1031	>	if (self.sourceSeedVtx):
1032	>	txt += 'VtxSeed=${args['+str(seedIndex)+']}\n'
1033	>	txt += 'echo "VtxSeed: <$VtxSeed>"\n'
1034	>	txt += 'sed "s#\<INPUTVTX\>#$VtxSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1035	>	seedIndex += 1
1036	>	if (self.sourceSeedG4):
1037	>	txt += 'G4Seed=${args['+str(seedIndex)+']}\n'
1038	>	txt += 'echo "G4Seed: <$G4Seed>"\n'
1039	>	txt += 'sed "s#\<INPUTG4\>#$G4Seed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1040	>	seedIndex += 1
1041	>	if (self.sourceSeedMix):
1042	>	txt += 'mixSeed=${args['+str(seedIndex)+']}\n'
1043	>	txt += 'echo "MixSeed: <$mixSeed>"\n'
1044	>	txt += 'sed "s#\<INPUTMIX\>#$mixSeed#" '+pset+' > tmp && mv -f tmp '+pset+'\n'
1045	>	seedIndex += 1
1046	>	pass
1047	>	pass
1048	>	txt += 'mv -f '+pset+' pset.cfg\n'
1049
1050		if len(self.additional_inbox_files) > 0:
1051	<	for file in self.additional_inbox_files:
1052	<	txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
1053	<	txt += ' cp $RUNTIME_AREA/'+file+' .\n'
697	<	txt += ' chmod +x '+file+'\n'
698	<	txt += 'fi\n'
1051	>	txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
1052	>	txt += ' tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
1053	>	txt += 'fi\n'
1054		pass
1055
1056	<	txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1057	<
1058	<	txt += '\n'
1059	<	txt += 'echo "*** cat pset.cfg *******"\n'
1060	<	txt += 'cat pset.cfg\n'
1061	<	txt += 'echo "**** end pset.cfg ******"\n'
1062	<	txt += '\n'
1063	<	# txt += 'echo "*** cat pset1.cfg *******"\n'
1064	<	# txt += 'cat pset1.cfg\n'
1065	<	# txt += 'echo "**** end pset1.cfg ******"\n'
1056	>	if self.pset != None: #CarlosDaniele
1057	>	txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
1058	>
1059	>	txt += '\n'
1060	>	txt += 'echo "*** cat pset.cfg *******"\n'
1061	>	txt += 'cat pset.cfg\n'
1062	>	txt += 'echo "**** end pset.cfg ******"\n'
1063	>	txt += '\n'
1064	>	### FEDE FOR DBS OUTPUT PUBLICATION
1065	>	txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
1066	>	txt += 'echo "PSETHASH = $PSETHASH" \n'
1067	>	##############
1068	>	txt += '\n'
1069	>	# txt += 'echo "*** cat pset1.cfg *******"\n'
1070	>	# txt += 'cat pset1.cfg\n'
1071	>	# txt += 'echo "**** end pset1.cfg ******"\n'
1072		return txt
1073
1074	<	def wsBuildExe(self, nj):
1074	>	def wsBuildExe(self, nj=0):
1075		"""
1076		Put in the script the commands to build an executable
1077		or a library.
#	Line 737 \| Line 1098 \| class Cmssw(JobType):
1098		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1099		txt += ' rm -f $RUNTIME_AREA/$repo \n'
1100		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
740	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
1101		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1102		txt += ' fi\n'
1103		txt += ' fi \n'
#	Line 746 \| Line 1106 \| class Cmssw(JobType):
1106		txt += 'else \n'
1107		txt += ' echo "Successful untar" \n'
1108		txt += 'fi \n'
1109	+	txt += '\n'
1110	+	txt += 'echo "Include ProdAgentApi and PRODCOMMON in PYTHONPATH"\n'
1111	+	txt += 'if [ -z "$PYTHONPATH" ]; then\n'
1112	+	#### FEDE FOR DBS OUTPUT PUBLICATION
1113	+	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon\n'
1114	+	#txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon\n'
1115	+	#txt += ' export PYTHONPATH=ProdAgentApi\n'
1116	+	txt += 'else\n'
1117	+	txt += ' export PYTHONPATH=$SOFTWARE_DIR/ProdAgentApi:$SOFTWARE_DIR/ProdCommon:${PYTHONPATH}\n'
1118	+	#txt += ' export PYTHONPATH=`pwd`/ProdAgentApi:`pwd`/ProdCommon:${PYTHONPATH}\n'
1119	+	#txt += ' export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
1120	+	txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
1121	+	###################
1122	+	txt += 'fi\n'
1123	+	txt += '\n'
1124	+
1125		pass
1126
1127		return txt
#	Line 757 \| Line 1133 \| class Cmssw(JobType):
1133		"""
1134
1135		def executableName(self):
1136	<	return self.executable
1136	>	if self.scriptExe: #CarlosDaniele
1137	>	return "sh "
1138	>	else:
1139	>	return self.executable
1140
1141		def executableArgs(self):
1142	<	return " -p pset.cfg"
1142	>	if self.scriptExe:#CarlosDaniele
1143	>	return self.scriptExe + " $NJob"
1144	>	else:
1145	>	# if >= CMSSW_1_5_X, add -e
1146	>	version_array = self.scram.getSWVersion().split('_')
1147	>	major = 0
1148	>	minor = 0
1149	>	try:
1150	>	major = int(version_array[1])
1151	>	minor = int(version_array[2])
1152	>	except:
1153	>	msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
1154	>	raise CrabException(msg)
1155	>	if major >= 1 and minor >= 5 :
1156	>	return " -e -p pset.cfg"
1157	>	else:
1158	>	return " -p pset.cfg"
1159
1160		def inputSandbox(self, nj):
1161		"""
1162		Returns a list of filenames to be put in JDL input sandbox.
1163		"""
1164		inp_box = []
1165	<	# dict added to delete duplicate from input sandbox file list
1166	<	seen = {}
1165	>	# # dict added to delete duplicate from input sandbox file list
1166	>	# seen = {}
1167		## code
1168		if os.path.isfile(self.tgzNameWithPath):
1169		inp_box.append(self.tgzNameWithPath)
1170	+	if os.path.isfile(self.MLtgzfile):
1171	+	inp_box.append(self.MLtgzfile)
1172		## config
1173	<	inp_box.append(common.job_list[nj].configFilename())
1173	>	if not self.pset is None:
1174	>	inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1175		## additional input files
1176	<	#for file in self.additional_inbox_files:
1177	<	# inp_box.append(common.work_space.cwdDir()+file)
1176	>	tgz = self.additionalInputFileTgz()
1177	>	inp_box.append(tgz)
1178		return inp_box
1179
1180		def outputSandbox(self, nj):
#	Line 785 \| Line 1183 \| class Cmssw(JobType):
1183		"""
1184		out_box = []
1185
788	–	stdout=common.job_list[nj].stdout()
789	–	stderr=common.job_list[nj].stderr()
790	–
1186		## User Declared output files
1187	<	for out in self.output_file:
1187	>	for out in (self.output_file+self.output_file_sandbox):
1188		n_out = nj + 1
1189		out_box.append(self.numberFile_(out,str(n_out)))
1190		return out_box
796	–	return []
1191
1192		def prepareSteeringCards(self):
1193		"""
#	Line 809 \| Line 1203 \| class Cmssw(JobType):
1203		txt = '\n'
1204		txt += '# directory content\n'
1205		txt += 'ls \n'
1206	<	file_list = ''
1207	<	for fileWithSuffix in self.output_file:
1206	>
1207	>	txt += 'output_exit_status=0\n'
1208	>
1209	>	for fileWithSuffix in (self.output_file_sandbox):
1210		output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
815	–	file_list=file_list+output_file_num+' '
1211		txt += '\n'
1212		txt += '# check output file\n'
1213	<	txt += 'ls '+fileWithSuffix+'\n'
1214	<	txt += 'exe_result=$?\n'
1215	<	txt += 'if [ $exe_result -ne 0 ] ; then\n'
1216	<	txt += ' echo "ERROR: No output file to manage"\n'
1217	<	txt += ' echo "JOB_EXIT_STATUS = $exe_result"\n'
1218	<	txt += ' echo "JobExitCode=60302" \| tee -a $RUNTIME_AREA/$repo\n'
824	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
825	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
826	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
827	<	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
828	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
829	<	### OLI_DANIELE
1213	>	txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1214	>	txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1215	>	txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1216	>	txt += 'else\n'
1217	>	txt += ' exit_status=60302\n'
1218	>	txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1219		if common.scheduler.boss_scheduler_name == 'condor_g':
1220		txt += ' if [ $middleware == OSG ]; then \n'
1221		txt += ' echo "prepare dummy output file"\n'
1222		txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1223		txt += ' fi \n'
1224	+	txt += 'fi\n'
1225	+
1226	+	for fileWithSuffix in (self.output_file):
1227	+	output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
1228	+	txt += '\n'
1229	+	txt += '# check output file\n'
1230	+	txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
1231	+	txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA\n'
1232	+	txt += ' cp $RUNTIME_AREA/'+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1233		txt += 'else\n'
1234	<	txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
1234	>	txt += ' exit_status=60302\n'
1235	>	txt += ' echo "ERROR: Problem with output file '+fileWithSuffix+'"\n'
1236	>	txt += ' echo "JOB_EXIT_STATUS = $exit_status"\n'
1237	>	txt += ' output_exit_status=$exit_status\n'
1238	>	if common.scheduler.boss_scheduler_name == 'condor_g':
1239	>	txt += ' if [ $middleware == OSG ]; then \n'
1240	>	txt += ' echo "prepare dummy output file"\n'
1241	>	txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
1242	>	txt += ' fi \n'
1243		txt += 'fi\n'
1244	<
1244	>	file_list = []
1245	>	for fileWithSuffix in (self.output_file):
1246	>	file_list.append(self.numberFile_(fileWithSuffix, '$NJob'))
1247	>
1248	>	txt += 'file_list="'+string.join(file_list,' ')+'"\n'
1249		txt += 'cd $RUNTIME_AREA\n'
840	–	file_list=file_list[:-1]
841	–	txt += 'file_list="'+file_list+'"\n'
842	–	### OLI_DANIELE
843	–	txt += 'if [ $middleware == OSG ]; then\n'
844	–	txt += ' cd $RUNTIME_AREA\n'
845	–	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
846	–	txt += ' /bin/rm -rf $WORKING_DIR\n'
847	–	txt += ' if [ -d $WORKING_DIR ] ;then\n'
848	–	txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
849	–	txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
850	–	txt += ' echo "JobExitCode=60999" \| tee -a $RUNTIME_AREA/$repo\n'
851	–	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
852	–	txt += ' rm -f $RUNTIME_AREA/$repo \n'
853	–	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
854	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
855	–	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
856	–	txt += ' fi\n'
857	–	txt += 'fi\n'
858	–	txt += '\n'
1250		return txt
1251
1252		def numberFile_(self, file, txt):
#	Line 866 \| Line 1257 \| class Cmssw(JobType):
1257		# take away last extension
1258		name = p[0]
1259		for x in p[1:-1]:
1260	<	name=name+"."+x
1260	>	name=name+"."+x
1261		# add "_txt"
1262		if len(p)>1:
1263	<	ext = p[len(p)-1]
1264	<	#result = name + '_' + str(txt) + "." + ext
874	<	result = name + '_' + txt + "." + ext
1263	>	ext = p[len(p)-1]
1264	>	result = name + '_' + txt + "." + ext
1265		else:
1266	<	#result = name + '_' + str(txt)
877	<	result = name + '_' + txt
1266	>	result = name + '_' + txt
1267
1268		return result
1269
1270	<	def getRequirements(self):
1270	>	def getRequirements(self, nj=[]):
1271		"""
1272		return job requirements to add to jdl files
1273		"""
1274		req = ''
1275	<	if common.analisys_common_info['sw_version']:
1275	>	if self.version:
1276		req='Member("VO-cms-' + \
1277	<	common.analisys_common_info['sw_version'] + \
1277	>	self.version + \
1278	>	'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1279	>	## SL add requirement for OS version only if SL4
1280	>	#reSL4 = re.compile( r'slc4' )
1281	>	if self.executable_arch: # and reSL4.search(self.executable_arch):
1282	>	req+=' && Member("VO-cms-' + \
1283	>	self.executable_arch + \
1284		'", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1285	<	if common.analisys_common_info['sites']:
1286	<	if len(common.analisys_common_info['sites'])>0:
1287	<	req = req + ' && ('
893	<	for i in range(len(common.analisys_common_info['sites'])):
894	<	req = req + 'other.GlueCEInfoHostName == "' \
895	<	+ common.analisys_common_info['sites'][i] + '"'
896	<	if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
897	<	req = req + ' \|\| '
898	<	req = req + ')'
899	<	#print "req = ", req
1285	>
1286	>	req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
1287	>
1288		return req
1289
1290		def configFilename(self):
#	Line 913 \| Line 1301 \| class Cmssw(JobType):
1301		txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
1302		txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
1303		txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
1304	+	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1305		txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
1306	<	txt += ' elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
1307	<	txt += ' # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
1308	<	txt += ' source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
1306	>	txt += ' elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
1307	>	txt += ' # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
1308	>	txt += ' export SCRAM_ARCH='+self.executable_arch+'\n'
1309	>	txt += ' source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
1310		txt += ' else\n'
1311	<	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1311	>	txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1312		txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
1313		txt += ' echo "JobExitCode=10020" \| tee -a $RUNTIME_AREA/$repo\n'
1314		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1315		txt += ' rm -f $RUNTIME_AREA/$repo \n'
1316		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
927	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
1317		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1318		txt += ' exit 1\n'
1319		txt += '\n'
#	Line 932 \| Line 1321 \| class Cmssw(JobType):
1321		txt += ' cd $RUNTIME_AREA\n'
1322		txt += ' /bin/rm -rf $WORKING_DIR\n'
1323		txt += ' if [ -d $WORKING_DIR ] ;then\n'
1324	<	txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
1325	<	txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1326	<	txt += ' echo "JobExitCode=10017" \| tee -a $RUNTIME_AREA/$repo\n'
1327	<	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1328	<	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1329	<	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1330	<	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
942	<	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1324	>	txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1325	>	txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
1326	>	txt += ' echo "JobExitCode=10017" \| tee -a $RUNTIME_AREA/$repo\n'
1327	>	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1328	>	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1329	>	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1330	>	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1331		txt += ' fi\n'
1332		txt += '\n'
1333		txt += ' exit 1\n'
#	Line 965 \| Line 1353 \| class Cmssw(JobType):
1353		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1354		txt += ' rm -f $RUNTIME_AREA/$repo \n'
1355		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
968	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
1356		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1357		txt += ' exit 1\n'
1358		txt += ' else\n'
#	Line 977 \| Line 1364 \| class Cmssw(JobType):
1364		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1365		txt += ' rm -f $RUNTIME_AREA/$repo \n'
1366		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
980	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
1367		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1368		txt += ' exit 1\n'
1369		txt += ' fi\n'
#	Line 991 \| Line 1377 \| class Cmssw(JobType):
1377		txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1378		txt += ' rm -f $RUNTIME_AREA/$repo \n'
1379		txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
994	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
1380		txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1381		txt += ' exit 1\n'
1382		txt += ' fi\n'
1383		txt += ' fi\n'
1384		txt += ' \n'
1000	–	txt += ' string=`cat /etc/redhat-release`\n'
1001	–	txt += ' echo $string\n'
1002	–	txt += ' if [[ $string = alhalla ]]; then\n'
1003	–	txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1004	–	txt += ' elif [[ $string = Enterprise ]] \|\| [[ $string = cientific ]]; then\n'
1005	–	txt += ' export SCRAM_ARCH=slc3_ia32_gcc323\n'
1006	–	txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1007	–	txt += ' else\n'
1008	–	txt += ' echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1009	–	txt += ' echo "JOB_EXIT_STATUS = 10033"\n'
1010	–	txt += ' echo "JobExitCode=10033" \| tee -a $RUNTIME_AREA/$repo\n'
1011	–	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1012	–	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1013	–	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1014	–	txt += ' echo "SyncGridJobId=`echo $SyncGridJobId`" \| tee -a $RUNTIME_AREA/$repo \n'
1015	–	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1016	–	txt += ' exit 1\n'
1017	–	txt += ' fi\n'
1385		txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1386		txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1387		return txt
1388
1389	+	### FEDE FOR DBS OUTPUT PUBLICATION
1390	+	def modifyReport(self, nj):
1391	+	"""
1392	+	insert the part of the script that modifies the FrameworkJob Report
1393	+	"""
1394	+
1395	+	txt = ''
1396	+	try:
1397	+	publish_data = int(self.cfg_params['USER.publish_data'])
1398	+	except KeyError:
1399	+	publish_data = 0
1400	+	if (publish_data == 1):
1401	+	txt += 'echo "Modify Job Report" \n'
1402	+	#txt += 'chmod a+x $RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1403	+	################ FEDE FOR DBS2 #############################################
1404	+	txt += 'chmod a+x $SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py\n'
1405	+	#############################################################################
1406	+	#try:
1407	+	# publish_data = int(self.cfg_params['USER.publish_data'])
1408	+	#except KeyError:
1409	+	# publish_data = 0
1410	+
1411	+	txt += 'if [ -z "$SE" ]; then\n'
1412	+	txt += ' SE="" \n'
1413	+	txt += 'fi \n'
1414	+	txt += 'if [ -z "$SE_PATH" ]; then\n'
1415	+	txt += ' SE_PATH="" \n'
1416	+	txt += 'fi \n'
1417	+	txt += 'echo "SE = $SE"\n'
1418	+	txt += 'echo "SE_PATH = $SE_PATH"\n'
1419	+
1420	+	#if (publish_data == 1):
1421	+	#processedDataset = self.cfg_params['USER.processed_datasetname']
1422	+	processedDataset = self.cfg_params['USER.publish_data_name']
1423	+	txt += 'ProcessedDataset='+processedDataset+'\n'
1424	+	#### LFN=/store/user/<user>/processedDataset_PSETHASH
1425	+	txt += 'if [ "$SE_PATH" == "" ]; then\n'
1426	+	#### FEDE: added slash in LFN ##############
1427	+	txt += ' FOR_LFN=/copy_problems/ \n'
1428	+	txt += 'else \n'
1429	+	txt += ' tmp=`echo $SE_PATH \| awk -F \'store\' \'{print$2}\'` \n'
1430	+	##### FEDE TO BE CHANGED, BECAUSE STORE IS HARDCODED!!!! ########
1431	+	txt += ' FOR_LFN=/store$tmp \n'
1432	+	txt += 'fi \n'
1433	+	txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1434	+	txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1435	+	txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1436	+	#txt += 'echo "$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1437	+	txt += 'echo "$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH"\n'
1438	+	txt += '$SOFTWARE_DIR/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1439	+	#txt += '$RUNTIME_AREA/'+self.version+'/ProdAgentApi/FwkJobRep/ModifyJobReport.py crab_fjr_$NJob.xml $NJob $FOR_LFN $PrimaryDataset $DataTier $ProcessedDataset $ApplicationFamily $executable $CMSSW_VERSION $PSETHASH $SE $SE_PATH\n'
1440	+
1441	+	txt += 'modifyReport_result=$?\n'
1442	+	txt += 'echo modifyReport_result = $modifyReport_result\n'
1443	+	txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1444	+	txt += ' exit_status=1\n'
1445	+	txt += ' echo "ERROR: Problem with ModifyJobReport"\n'
1446	+	txt += 'else\n'
1447	+	txt += ' mv NewFrameworkJobReport.xml crab_fjr_$NJob.xml\n'
1448	+	txt += 'fi\n'
1449	+	else:
1450	+	txt += 'echo "no data publication required"\n'
1451	+	#txt += 'ProcessedDataset=no_data_to_publish \n'
1452	+	#### FEDE: added slash in LFN ##############
1453	+	#txt += 'FOR_LFN=/local/ \n'
1454	+	#txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1455	+	#txt += 'echo "FOR_LFN = $FOR_LFN" \n'
1456	+	return txt
1457	+
1458	+	def cleanEnv(self):
1459	+	### OLI_DANIELE
1460	+	txt = ''
1461	+	txt += 'if [ $middleware == OSG ]; then\n'
1462	+	txt += ' cd $RUNTIME_AREA\n'
1463	+	txt += ' echo "Remove working directory: $WORKING_DIR"\n'
1464	+	txt += ' /bin/rm -rf $WORKING_DIR\n'
1465	+	txt += ' if [ -d $WORKING_DIR ] ;then\n'
1466	+	txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1467	+	txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
1468	+	txt += ' echo "JobExitCode=60999" \| tee -a $RUNTIME_AREA/$repo\n'
1469	+	txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
1470	+	txt += ' rm -f $RUNTIME_AREA/$repo \n'
1471	+	txt += ' echo "MonitorJobID=`echo $MonitorJobID`" \| tee -a $RUNTIME_AREA/$repo \n'
1472	+	txt += ' echo "MonitorID=`echo $MonitorID`" \| tee -a $RUNTIME_AREA/$repo\n'
1473	+	txt += ' fi\n'
1474	+	txt += 'fi\n'
1475	+	txt += '\n'
1476	+	return txt
1477	+
1478		def setParam_(self, param, value):
1479		self._params[param] = value
1480
#	Line 1030 \| Line 1486 \| class Cmssw(JobType):
1486
1487		def getTaskid(self):
1488		return self._taskId
1489	+
1490	+	def uniquelist(self, old):
1491	+	"""
1492	+	remove duplicates from a list
1493	+	"""
1494	+	nd={}
1495	+	for e in old:
1496	+	nd[e]=0
1497	+	return nd.keys()
1498	+
1499	+
1500	+	def checkOut(self, limit):
1501	+	"""
1502	+	check the dimension of the output files
1503	+	"""
1504	+	txt = 'echo "*****************************************"\n'
1505	+	txt += 'echo " Starting output sandbox limit check "\n'
1506	+	txt += 'echo "*****************************************"\n'
1507	+	allOutFiles = ""
1508	+	listOutFiles = []
1509	+	for fileOut in (self.output_file+self.output_file_sandbox):
1510	+	if fileOut.find('crab_fjr') == -1:
1511	+	allOutFiles = allOutFiles + " " + self.numberFile_(fileOut, '$NJob')
1512	+	listOutFiles.append(self.numberFile_(fileOut, '$NJob'))
1513	+	txt += 'echo "OUTPUT files: '+str(allOutFiles)+'";\n'
1514	+	txt += 'ls -gGhrta;\n'
1515	+	txt += 'sum=0;\n'
1516	+	txt += 'for file in '+str(allOutFiles)+' ; do\n'
1517	+	txt += ' if [ -e $file ]; then\n'
1518	+	txt += ' tt=`ls -gGrta $file \| awk \'{ print $3 }\'`\n'
1519	+	txt += ' sum=`expr $sum + $tt`\n'
1520	+	txt += ' else\n'
1521	+	txt += ' echo "WARNING: output file $file not found!"\n'
1522	+	txt += ' fi\n'
1523	+	txt += 'done\n'
1524	+	txt += 'echo "Total Output dimension: $sum";\n'
1525	+	txt += 'limit='+str(limit)+';\n'
1526	+	txt += 'echo "OUTPUT FILES LIMIT SET TO: $limit";\n'
1527	+	txt += 'if [ $limit -lt $sum ]; then\n'
1528	+	txt += ' echo "WARNING: output files have to big size - something will be lost;"\n'
1529	+	txt += ' echo " checking the output file sizes..."\n'
1530	+	"""
1531	+	txt += ' dim=0;\n'
1532	+	txt += ' exclude=0;\n'
1533	+	txt += ' for files in '+str(allOutFiles)+' ; do\n'
1534	+	txt += ' sumTemp=0;\n'
1535	+	txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1536	+	txt += ' if [ $file != $file2 ]; then\n'
1537	+	txt += ' tt=`ls -gGrta $file2 \| awk \'{ print $3 }\';`\n'
1538	+	txt += ' sumTemp=`expr $sumTemp + $tt`;\n'
1539	+	txt += ' fi\n'
1540	+	txt += ' done\n'
1541	+	txt += ' if [ $sumTemp -lt $limit ]; then\n'
1542	+	txt += ' if [ $dim -lt $sumTemp ]; then\n'
1543	+	txt += ' dim=$sumTemp;\n'
1544	+	txt += ' exclude=$file;\n'
1545	+	txt += ' fi\n'
1546	+	txt += ' fi\n'
1547	+	txt += ' done\n'
1548	+	txt += ' echo "Dimension calculated: $dim"; echo "File to exclude: $exclude";\n'
1549	+	"""
1550	+	txt += ' tot=0;\n'
1551	+	txt += ' for file2 in '+str(allOutFiles)+' ; do\n'
1552	+	txt += ' tt=`ls -gGrta $file2 \| awk \'{ print $3 }\';`\n'
1553	+	txt += ' tot=`expr $tot + $tt`;\n'
1554	+	txt += ' if [ $limit -lt $tot ]; then\n'
1555	+	txt += ' tot=`expr $tot - $tt`;\n'
1556	+	txt += ' fileLast=$file;\n'
1557	+	txt += ' break;\n'
1558	+	txt += ' fi\n'
1559	+	txt += ' done\n'
1560	+	txt += ' echo "Dimension calculated: $tot"; echo "First file to exclude: $file";\n'
1561	+	txt += ' flag=0;\n'
1562	+	txt += ' for filess in '+str(allOutFiles)+' ; do\n'
1563	+	txt += ' if [ $fileLast = $filess ]; then\n'
1564	+	txt += ' flag=1;\n'
1565	+	txt += ' fi\n'
1566	+	txt += ' if [ $flag -eq 1 ]; then\n'
1567	+	txt += ' rm -f $filess;\n'
1568	+	txt += ' fi\n'
1569	+	txt += ' done\n'
1570	+	txt += ' ls -agGhrt;\n'
1571	+	txt += ' echo "WARNING: output files are too big in dimension: can not put in the output_sandbox.";\n'
1572	+	txt += ' echo "JOB_EXIT_STATUS = 70000";\n'
1573	+	txt += ' exit_status=70000;\n'
1574	+	txt += 'else'
1575	+	txt += ' echo "Total Output dimension $sum is fine.";\n'
1576	+	txt += 'fi\n'
1577	+	txt += 'echo "*****************************************"\n'
1578	+	txt += 'echo "* Ending output sandbox limit check *"\n'
1579	+	txt += 'echo "*****************************************"\n'
1580	+	return txt

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/cms_cmssw.py (file contents): Revision 1.13 by gutsche, Tue Jun 27 02:31:31 2006 UTC vs. Revision 1.128 by fanzago, Thu Oct 11 16:23:44 2007 UTC

Diff Legend

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.13 by gutsche, Tue Jun 27 02:31:31 2006 UTC vs.
Revision 1.128 by fanzago, Thu Oct 11 16:23:44 2007 UTC