[ViewVC] Diff of: cvsroot/COMP/CRAB/python/DataDiscovery.py

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.19 by slacapra, Fri Jan 4 17:30:56 2008 UTC vs.
Revision 1.23 by spiga, Mon Jun 9 17:50:45 2008 UTC

#	Line 4 \| Line 4 \| import DBSAPI.dbsApi
4		from DBSAPI.dbsApiException import *
5		import common
6		from crab_util import *
7	+	import os
8
9
10		# #######################################
#	Line 70 \| Line 71 \| class NoDataTierinProvenanceError(except
71		# ####################################
72		# class to find and extact info from published data
73		class DataDiscovery:
74	<	def __init__(self, datasetPath, cfg_params):
74	>	def __init__(self, datasetPath, cfg_params, skipAnBlocks):
75
76		# Attributes
77		self.datasetPath = datasetPath
78		self.cfg_params = cfg_params
79	+	self.skipBlocks = skipAnBlocks
80
81		self.eventsPerBlock = {} # DBS output: map fileblocks-events for collection
82		self.eventsPerFile = {} # DBS output: map files-events
83		self.blocksinfo = {} # DBS output: map fileblocks-files
84		self.maxEvents = 0 # DBS output: max events
85	+	self.parent = {} # DBS output: max events
86
87		# ####################################
88		def fetchDBSInfo(self):
#	Line 99 \| Line 102 \| class DataDiscovery:
102		if (self.cfg_params.has_key('CMSSW.runselection')):
103		runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
104
105	+	common.logger.debug(6,"runselection is: %s"%runselection)
106		## service API
107		args = {}
108		args['url'] = dbs_url
109		args['level'] = 'CRITICAL'
110
111	+	## check if has been requested to use the parent info
112	+	useParent = self.cfg_params.get('CMSSW.use_parent',False)
113	+
114	+	## check if has been asked for a non default file to store/read analyzed fileBlocks
115	+	defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt'
116	+	fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
117	+
118		api = DBSAPI.dbsApi.DbsApi(args)
119		try:
120		if len(runselection) <= 0 :
121	<	files = api.listDatasetFiles(self.datasetPath)
121	>	if useParent:
122	>	allowedRetriveValue = ['retrive_parent',
123	>	'retrive_block',
124	>	'retrive_lumi',
125	>	'retrive_run'
126	>	]
127	>	files = api.listFiles(path=self.datasetPath, retriveList=allowedRetriveValue)
128	>	common.logger.debug(5,"Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
129	>	common.logger.write("Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
130	>	else:
131	>	files = api.listDatasetFiles(self.datasetPath)
132		else :
133	<	files = api.listFiles(path=self.datasetPath, details=True)
133	>	files=[]
134	>	for arun in runselection:
135	>	try:
136	>	filesinrun = api.listFiles(path=self.datasetPath,retriveList=allowedRetriveValue,runNumber=arun)
137	>	files.extend(filesinrun)
138	>	except:
139	>	msg="WARNING: problem extracting info from DBS for run %s "%arun
140	>	common.logger.message(msg)
141	>	pass
142	>
143		except DbsBadRequest, msg:
144		raise DataDiscoveryError(msg)
145		except DBSError, msg:
146		raise DataDiscoveryError(msg)
147
148	+	anFileBlocks = []
149	+	if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
150	+
151		# parse files and fill arrays
152		for file in files :
153	<	filename = file['LogicalFileName']
154	<	if filename.find('.dat') < 0 :
155	<	fileblock = file['Block']['Name']
156	<	events = file['NumberOfEvents']
157	<	continue_flag = 0
158	<	if len(runselection) > 0 :
159	<	runslist = file['RunsList']
160	<	for run in runslist :
161	<	runnumber = run['RunNumber']
162	<	for selected_run in runselection :
130	<	if runnumber == selected_run :
131	<	continue_flag = 1
132	<	else :
133	<	continue_flag = 1
134	<
135	<	if continue_flag == 1 :
153	>	parList = []
154	>	# skip already analyzed blocks
155	>	fileblock = file['Block']['Name']
156	>	if fileblock not in anFileBlocks :
157	>	filename = file['LogicalFileName']
158	>	# asked retry the list of parent for the given child
159	>	if useParent: parList = [x['LogicalFileName'] for x in file['ParentList']]
160	>	self.parent[filename] = parList
161	>	if filename.find('.dat') < 0 :
162	>	events = file['NumberOfEvents']
163		# number of events per block
164		if fileblock in self.eventsPerBlock.keys() :
165		self.eventsPerBlock[fileblock] += events
166		else :
167		self.eventsPerBlock[fileblock] = events
141	–
168		# number of events per file
169		self.eventsPerFile[filename] = events
170	<
170	>
171		# number of events per block
172		if fileblock in self.blocksinfo.keys() :
173		self.blocksinfo[fileblock].append(filename)
174		else :
175		self.blocksinfo[fileblock] = [filename]
176	<
176	>
177		# total number of events
178		self.maxEvents += events
179	+	if self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
180	+	msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
181	+	raise CrabException(msg)
182
183	+	saveFblocks=''
184		for block in self.eventsPerBlock.keys() :
185	+	saveFblocks += str(block)+'\n'
186		common.logger.debug(6,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
187	<
187	>	writeTXTfile(self, fileBlocks_FileName , saveFblocks)
188	>
189		if len(self.eventsPerBlock) <= 0:
190		raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
191		+ " dataset path variables in crab.cfg")
#	Line 188 \| Line 220 \| class DataDiscovery:
220		"""
221		return self.blocksinfo
222
223	+	# #################################################
224	+	def getParent(self):
225	+	"""
226	+	return parent grouped by file
227	+	"""
228	+	return self.parent
229	+
230		########################################################################

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/DataDiscovery.py (file contents): Revision 1.19 by slacapra, Fri Jan 4 17:30:56 2008 UTC vs. Revision 1.23 by spiga, Mon Jun 9 17:50:45 2008 UTC

Diff Legend

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.19 by slacapra, Fri Jan 4 17:30:56 2008 UTC vs.
Revision 1.23 by spiga, Mon Jun 9 17:50:45 2008 UTC