[ViewVC] Diff of: cvsroot/COMP/CRAB/python/Publisher.py

Comparing COMP/CRAB/python/Publisher.py (file contents):
Revision 1.7 by fanzago, Tue Dec 18 10:58:20 2007 UTC vs.
Revision 1.36 by fanzago, Mon Jun 22 16:22:32 2009 UTC

#	Line 3 \| Line 3 \| import common
3		import time, glob
4		from Actor import *
5		from crab_util import *
6	–	from crab_logger import Logger
6		from crab_exceptions import *
7	<	from FwkJobRep.ReportParser import readJobReport
7	>	from ProdCommon.FwkJobRep.ReportParser import readJobReport
8	>	from ProdCommon.FwkJobRep.ReportState import checkSuccess
9		from ProdCommon.MCPayloads.WorkflowSpec import WorkflowSpec
10		from ProdCommon.DataMgmt.DBS.DBSWriter import DBSWriter
11		from ProdCommon.DataMgmt.DBS.DBSErrors import DBSWriterError, formatEx,DBSReaderError
12		from ProdCommon.DataMgmt.DBS.DBSReader import DBSReader
13		from ProdCommon.DataMgmt.DBS.DBSWriter import DBSWriter,DBSWriterObjects
14		import sys
15	+	from DBSAPI.dbsApiException import DbsException
16	+	from DBSAPI.dbsApi import DbsApi
17
18		class Publisher(Actor):
19		def __init__(self, cfg_params):
#	Line 23 \| Line 25 \| class Publisher(Actor):
25		- publishes output data on DBS and DLS
26		"""
27
28	<	try:
29	<	self.processedData = cfg_params['USER.publish_data_name']
30	<	except KeyError:
28	>	self.cfg_params=cfg_params
29	>
30	>	if not cfg_params.has_key('USER.publish_data_name'):
31		raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
32	<	try:
33	<	if (int(cfg_params['USER.copy_data']) != 1): raise KeyError
34	<	except KeyError:
35	<	raise CrabException('You can not publish data because you did not selected * copy_data = 1 * in the crab.cfg file')
36	<	try:
37	<	self.pset = cfg_params['CMSSW.pset']
38	<	except KeyError:
32	>	self.userprocessedData = cfg_params['USER.publish_data_name']
33	>	self.processedData = None
34	>
35	>	if (not cfg_params.has_key('USER.copy_data') or int(cfg_params['USER.copy_data']) != 1 ) or \
36	>	(not cfg_params.has_key('USER.publish_data') or int(cfg_params['USER.publish_data']) != 1 ):
37	>	msg = 'You can not publish data because you did not selected \n'
38	>	msg += '\t* copy_data = 1 and publish_data = 1 * in the crab.cfg file'
39	>	raise CrabException(msg)
40	>
41	>	if not cfg_params.has_key('CMSSW.pset'):
42		raise CrabException('Cannot publish output data, because you did not specify the psetname in [CMSSW] of your crab.cfg file')
43	<	try:
44	<	self.globalDBS=cfg_params['CMSSW.dbs_url']
45	<	except KeyError:
46	<	self.globalDBS="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
47	<	try:
48	<	self.DBSURL=cfg_params['USER.dbs_url_for_publication']
49	<	common.logger.message('<dbs_url_for_publication> = '+self.DBSURL)
50	<	if (self.DBSURL == "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") or (self.DBSURL == "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global_writer/servlet/DBSServlet"):
51	<	msg = "You can not publish your data in the globalDBS = " + self.DBSURL + "\n"
52	<	msg = msg + "Please write your local one in the [USER] section 'dbs_url_for_publication'"
53	<	raise CrabException(msg)
54	<	except KeyError:
55	<	msg = "Error. The [USER] section does not have 'dbs_url_for_publication'"
56	<	msg = msg + " entry, necessary to publish the data"
43	>	self.pset = cfg_params['CMSSW.pset']
44	>
45	>	self.globalDBS=cfg_params.get('CMSSW.dbs_url',"http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet")
46	>
47	>	if not cfg_params.has_key('USER.dbs_url_for_publication'):
48	>	msg = "Warning. The [USER] section does not have 'dbs_url_for_publication'"
49	>	msg = msg + " entry, necessary to publish the data.\n"
50	>	msg = msg + "Use the command crab -publish -USER.dbs_url_for_publication=dbs_url_for_publication* \nwhere dbs_url_for_publication is your local dbs instance."
51	>	raise CrabException(msg)
52	>
53	>	self.DBSURL=cfg_params['USER.dbs_url_for_publication']
54	>	common.logger.info('<dbs_url_for_publication> = '+self.DBSURL)
55	>	if (self.DBSURL == "http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet") or (self.DBSURL == "https://cmsdbsprod.cern.ch:8443/cms_dbs_prod_global_writer/servlet/DBSServlet"):
56	>	msg = "You can not publish your data in the globalDBS = " + self.DBSURL + "\n"
57	>	msg = msg + "Please write your local one in the [USER] section 'dbs_url_for_publication'"
58		raise CrabException(msg)
59
60		self.content=file(self.pset).read()
61		self.resDir = common.work_space.resDir()
62	+
63	+	self.dataset_to_import=[]
64	+
65		self.datasetpath=cfg_params['CMSSW.datasetpath']
66	+	if (self.datasetpath.upper() != 'NONE'):
67	+	self.dataset_to_import.append(self.datasetpath)
68	+
69	+	### Added PU dataset
70	+	tmp = cfg_params.get('CMSSW.dataset_pu',None)
71	+	if tmp :
72	+	datasets = tmp.split(',')
73	+	for dataset in datasets:
74	+	dataset=string.strip(dataset)
75	+	self.dataset_to_import.append(dataset)
76	+	###
77	+
78	+	self.skipOcheck=cfg_params.get('CMSSW.publish_zero_event',0)
79	+
80		self.SEName=''
81		self.CMSSW_VERSION=''
82		self.exit_status=''
#	Line 64 \| Line 87 \| class Publisher(Actor):
87
88		def importParentDataset(self,globalDBS, datasetpath):
89		"""
90	<	"""
90	>	"""
91		dbsWriter = DBSWriter(self.DBSURL,level='ERROR')
92
93		try:
94	<	dbsWriter.importDataset(globalDBS, self.datasetpath, self.DBSURL)
94	>	#dbsWriter.importDatasetWithoutParentage(globalDBS, datasetpath, self.DBSURL)
95	>	dbsWriter.importDataset(globalDBS, datasetpath, self.DBSURL)
96		except DBSWriterError, ex:
97		msg = "Error importing dataset to be processed into local DBS\n"
98		msg += "Source Dataset: %s\n" % datasetpath
99		msg += "Source DBS: %s\n" % globalDBS
100		msg += "Destination DBS: %s\n" % self.DBSURL
101	<	common.logger.message(msg)
101	>	common.logger.info(msg)
102	>	common.logger.debug(str(ex))
103	>	return 1
104	>	return 0
105	>	"""
106	>	print " patch for importParentDataset: datasetpath = ", datasetpath
107	>	try:
108	>	args={}
109	>	args['url']=self.DBSURL
110	>	args['mode']='POST'
111	>	block = ""
112	>	api = DbsApi(args)
113	>	#api.migrateDatasetContents(srcURL, dstURL, path, block , False)
114	>	api.migrateDatasetContents(globalDBS, self.DBSURL, datasetpath, block , False)
115	>
116	>	except DbsException, ex:
117	>	print "Caught API Exception %s: %s " % (ex.getClassName(), ex.getErrorMessage() )
118	>	if ex.getErrorCode() not in (None, ""):
119	>	print "DBS Exception Error Code: ", ex.getErrorCode()
120		return 1
121	+	print "Done"
122		return 0
123	<
123	>	"""
124		def publishDataset(self,file):
125		"""
126		"""
#	Line 87 \| Line 130 \| class Publisher(Actor):
130		except IndexError:
131		self.exit_status = '1'
132		msg = "Error: Problem with "+file+" file"
133	<	common.logger.message(msg)
133	>	common.logger.info(msg)
134		return self.exit_status
135	<
136	<	if (self.datasetpath != 'None'):
137	<	common.logger.message("--->>> Importing parent dataset in the dbs")
138	<	status_import=self.importParentDataset(self.globalDBS, self.datasetpath)
139	<	if (status_import == 1):
140	<	common.logger.message('Problem with parent import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL)
141	<	self.exit_status='1'
142	<	return self.exit_status
143	<	common.logger.message("Parent import ok")
135	>
136	>	if (len(self.dataset_to_import) != 0):
137	>	for dataset in self.dataset_to_import:
138	>	common.logger.info("--->>> Importing parent dataset in the dbs: " +dataset)
139	>	status_import=self.importParentDataset(self.globalDBS, dataset)
140	>	if (status_import == 1):
141	>	common.logger.info('Problem with parent '+ dataset +' import from the global DBS '+self.globalDBS+ 'to the local one '+self.DBSURL)
142	>	self.exit_status='1'
143	>	return self.exit_status
144	>	else:
145	>	common.logger.info('Import ok of dataset '+dataset)
146
147		#// DBS to contact
148		dbswriter = DBSWriter(self.DBSURL)
#	Line 107 \| Line 152 \| class Publisher(Actor):
152		except IndexError:
153		self.exit_status = '1'
154		msg = "Error: No file to publish in xml file"+file+" file"
155	<	common.logger.message(msg)
155	>	common.logger.info(msg)
156		return self.exit_status
157
158		datasets=fileinfo.dataset
159	<	common.logger.debug(6,"FileInfo = " + str(fileinfo))
160	<	common.logger.debug(6,"DatasetInfo = " + str(datasets))
159	>	common.logger.log(10-1,"FileInfo = " + str(fileinfo))
160	>	common.logger.log(10-1,"DatasetInfo = " + str(datasets))
161	>	if len(datasets)<=0:
162	>	self.exit_status = '1'
163	>	msg = "Error: No info about dataset in the xml file "+file
164	>	common.logger.info(msg)
165	>	return self.exit_status
166		for dataset in datasets:
167		#### for production data
168	+	self.processedData = dataset['ProcessedDataset']
169		if (dataset['PrimaryDataset'] == 'null'):
170	<	dataset['PrimaryDataset'] = dataset['ProcessedDataset']
171	<
170	>	#dataset['PrimaryDataset'] = dataset['ProcessedDataset']
171	>	dataset['PrimaryDataset'] = self.userprocessedData
172	>	#else: # add parentage from input dataset
173	>	elif self.datasetpath.upper() != 'NONE':
174	>	dataset['ParentDataset']= self.datasetpath
175	>
176		dataset['PSetContent']=self.content
177		cfgMeta = {'name' : self.pset , 'Type' : 'user' , 'annotation': 'user cfg', 'version' : 'private version'} # add real name of user cfg
178	<	common.logger.message("PrimaryDataset = %s"%dataset['PrimaryDataset'])
179	<	common.logger.message("ProcessedDataset = %s"%dataset['ProcessedDataset'])
180	<	common.logger.message("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER")
178	>	common.logger.info("PrimaryDataset = %s"%dataset['PrimaryDataset'])
179	>	common.logger.info("ProcessedDataset = %s"%dataset['ProcessedDataset'])
180	>	common.logger.info("<User Dataset Name> = /"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER")
181	>	self.dataset_to_check="/"+dataset['PrimaryDataset']+"/"+dataset['ProcessedDataset']+"/USER"
182
183	<	common.logger.debug(6,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset']))
183	>	common.logger.log(10-1,"--->>> Inserting primary: %s processed : %s"%(dataset['PrimaryDataset'],dataset['ProcessedDataset']))
184
185		primary = DBSWriterObjects.createPrimaryDataset( dataset, dbswriter.dbs)
186	<	common.logger.debug(6,"Primary: %s "%primary)
186	>	common.logger.log(10-1,"Primary: %s "%primary)
187
188		algo = DBSWriterObjects.createAlgorithm(dataset, cfgMeta, dbswriter.dbs)
189	<	common.logger.debug(6,"Algo: %s "%algo)
189	>	common.logger.log(10-1,"Algo: %s "%algo)
190
191		processed = DBSWriterObjects.createProcessedDataset(primary, algo, dataset, dbswriter.dbs)
192	<	common.logger.debug(6,"Processed: %s "%processed)
192	>	common.logger.log(10-1,"Processed: %s "%processed)
193
194	<	common.logger.debug(6,"Inserted primary %s processed %s"%(primary,processed))
194	>	common.logger.log(10-1,"Inserted primary %s processed %s"%(primary,processed))
195
196	<	common.logger.debug(6,"exit_status = %s "%self.exit_status)
196	>	common.logger.log(10-1,"exit_status = %s "%self.exit_status)
197		return self.exit_status
198
199		def publishAJobReport(self,file,procdataset):
#	Line 162 \| Line 218 \| class Publisher(Actor):
218		elif (file['LFN'] == ''):
219		self.noLFN.append(file['PFN'])
220		else:
221	<	if int(file['TotalEvents']) != 0 :
222	<	file.lumisections = {}
221	>	if self.skipOcheck==0:
222	>	if int(file['TotalEvents']) != 0:
223	>	#file.lumisections = {}
224	>	# lumi info are now in run hash
225	>	file.runs = {}
226	>	for ds in file.dataset:
227	>	### Fede for production
228	>	if (ds['PrimaryDataset'] == 'null'):
229	>	#ds['PrimaryDataset']=procdataset
230	>	ds['PrimaryDataset']=self.userprocessedData
231	>	filestopublish.append(file)
232	>	else:
233	>	self.noEventsFiles.append(file['LFN'])
234	>	else:
235	>	file.runs = {}
236		for ds in file.dataset:
168	–	ds['ProcessedDataset']=procdataset
237		### Fede for production
238		if (ds['PrimaryDataset'] == 'null'):
239	<	ds['PrimaryDataset']=procdataset
239	>	#ds['PrimaryDataset']=procdataset
240	>	ds['PrimaryDataset']=self.userprocessedData
241		filestopublish.append(file)
242	<	else:
174	<	self.noEventsFiles.append(file['LFN'])
242	>
243		jobReport.files = filestopublish
244		### if all files of FJR have number of events = 0
245		if (len(filestopublish) == 0):
#	Line 183 \| Line 251 \| class Publisher(Actor):
251		Blocks=None
252		try:
253		Blocks=dbswriter.insertFiles(jobReport)
254	<	common.logger.message("Blocks = %s"%Blocks)
254	>	common.logger.info("Inserting file in blocks = %s"%Blocks)
255		except DBSWriterError, ex:
256	<	common.logger.message("Insert file error: %s"%ex)
256	>	common.logger.info("Insert file error: %s"%ex)
257		return Blocks
258
259		def run(self):
#	Line 194 \| Line 262 \| class Publisher(Actor):
262		"""
263
264		file_list = glob.glob(self.resDir+"crab_fjr*.xml")
265	<	common.logger.debug(6, "file_list = "+str(file_list))
266	<	common.logger.debug(6, "len(file_list) = "+str(len(file_list)))
265	>	## Select only those fjr that are succesfull
266	>	good_list=[]
267	>	for fjr in file_list:
268	>	reports = readJobReport(fjr)
269	>	if len(reports)>0:
270	>	if reports[0].status == "Success":
271	>	good_list.append(fjr)
272	>	file_list=good_list
273	>	##
274	>	common.logger.log(10-1, "file_list = "+str(file_list))
275	>	common.logger.log(10-1, "len(file_list) = "+str(len(file_list)))
276
277		if (len(file_list)>0):
278		BlocksList=[]
279	<	common.logger.message("--->>> Start dataset publication")
279	>	common.logger.info("--->>> Start dataset publication")
280		self.exit_status=self.publishDataset(file_list[0])
281		if (self.exit_status == '1'):
282		return self.exit_status
283	<	common.logger.message("--->>> End dataset publication")
283	>	common.logger.info("--->>> End dataset publication")
284
285
286	<	common.logger.message("--->>> Start files publication")
286	>	common.logger.info("--->>> Start files publication")
287		for file in file_list:
288	<	common.logger.message("file = "+file)
288	>	common.logger.debug( "file = "+file)
289		Blocks=self.publishAJobReport(file,self.processedData)
290		if Blocks:
291	<	[BlocksList.append(x) for x in Blocks]
291	>	for x in Blocks: # do not allow multiple entries of the same block
292	>	if x not in BlocksList:
293	>	BlocksList.append(x)
294
295		# close the blocks
296	<	common.logger.debug(6, "BlocksList = %s"%BlocksList)
296	>	common.logger.log(10-1, "BlocksList = %s"%BlocksList)
297		# dbswriter = DBSWriter(self.DBSURL,level='ERROR')
298		dbswriter = DBSWriter(self.DBSURL)
299
300		for BlockName in BlocksList:
301		try:
302		closeBlock=dbswriter.manageFileBlock(BlockName,maxFiles= 1)
303	<	common.logger.debug(6, "closeBlock %s"%closeBlock)
303	>	common.logger.log(10-1, "closeBlock %s"%closeBlock)
304		#dbswriter.dbs.closeBlock(BlockName)
305		except DBSWriterError, ex:
306	<	common.logger.message("Close block error %s"%ex)
306	>	common.logger.info("Close block error %s"%ex)
307
308		if (len(self.noEventsFiles)>0):
309	<	common.logger.message("--->>> WARNING: "+str(len(self.noEventsFiles))+" files not published because they contain 0 events are:")
309	>	common.logger.info("--->>> WARNING: "+str(len(self.noEventsFiles))+" files not published because they contain 0 events are:")
310		for lfn in self.noEventsFiles:
311	<	common.logger.message("------ LFN: %s"%lfn)
311	>	common.logger.info("------ LFN: %s"%lfn)
312		if (len(self.noLFN)>0):
313	<	common.logger.message("--->>> WARNING: there are "+str(len(self.noLFN))+" files not published because they have empty LFN")
313	>	common.logger.info("--->>> WARNING: there are "+str(len(self.noLFN))+" files not published because they have empty LFN")
314		for pfn in self.noLFN:
315	<	common.logger.message("------ pfn: %s"%pfn)
315	>	common.logger.info("------ pfn: %s"%pfn)
316		if (len(self.problemFiles)>0):
317	<	common.logger.message("--->>> WARNING: "+str(len(self.problemFiles))+" files not published because they had problem with copy to SE")
317	>	common.logger.info("--->>> WARNING: "+str(len(self.problemFiles))+" files not published because they had problem with copy to SE")
318		for lfn in self.problemFiles:
319	<	common.logger.message("------ LFN: %s"%lfn)
320	<	common.logger.message("--->>> End files publication")
321	<	common.logger.message("--->>> To check data publication please use: InspectDBS2.py --DBSURL=<dbs_url_for_publication> --datasetPath=<User Dataset Name>")
319	>	common.logger.info("------ LFN: %s"%lfn)
320	>	common.logger.info("--->>> End files publication")
321	>
322	>	self.cfg_params['USER.dataset_to_check']=self.dataset_to_check
323	>	from InspectDBS import InspectDBS
324	>	check=InspectDBS(self.cfg_params)
325	>	check.checkPublication()
326		return self.exit_status
327
328		else:
329	<	common.logger.message("--->>> "+self.resDir+" empty: no file to publish on DBS")
329	>	common.logger.info("--->>> "+self.resDir+" empty: no file to publish on DBS")
330		self.exit_status = '1'
331		return self.exit_status
332

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/Publisher.py (file contents): Revision 1.7 by fanzago, Tue Dec 18 10:58:20 2007 UTC vs. Revision 1.36 by fanzago, Mon Jun 22 16:22:32 2009 UTC

Diff Legend

Comparing COMP/CRAB/python/Publisher.py (file contents):
Revision 1.7 by fanzago, Tue Dec 18 10:58:20 2007 UTC vs.
Revision 1.36 by fanzago, Mon Jun 22 16:22:32 2009 UTC