[ViewVC] Diff of: cvsroot/COMP/CRAB/python/DataDiscovery.py

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.37.2.1 by ewv, Thu Jan 21 16:13:28 2010 UTC vs.
Revision 1.50 by belforte, Thu Sep 12 13:45:22 2013 UTC

#	Line 8 \| Line 8 \| import DBSAPI.dbsApi
8		from DBSAPI.dbsApiException import *
9		import common
10		from crab_util import *
11	+	try: # Can remove when CMSSW 3.7 and earlier are dropped
12	+	from FWCore.PythonUtilities.LumiList import LumiList
13	+	except ImportError:
14	+	from LumiList import LumiList
15	+
16		import os
17
18
#	Line 88 \| Line 93 \| class DataDiscovery:
93		# Attributes
94		self.datasetPath = datasetPath
95		# Analysis dataset is primary/processed/tier/definition
96	<	self.ads = len(self.datasetPath.split("/")) > 4 or len(self.datasetPath.split("/")) == 1
96	>	self.ads = len(self.datasetPath.split("/")) > 4
97		self.cfg_params = cfg_params
98		self.skipBlocks = skipAnBlocks
99
#	Line 101 \| Line 106 \| class DataDiscovery:
106		self.maxLumis = 0 # DBS output: total number of lumis
107		self.parent = {} # DBS output: parents of each file
108		self.lumis = {} # DBS output: lumis in each file
109	<
109	>	self.lumiMask = None
110	>	self.splitByLumi = False
111	>	self.splitDataByEvent = 0
112
113		def fetchDBSInfo(self):
114		"""
#	Line 109 \| Line 116 \| class DataDiscovery:
116		"""
117		## get DBS URL
118		global_url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
119	<	caf_url = "http://cmsdbsprod.cern.ch/cms_dbs_caf_analysis_01/servlet/DBSServlet"
113	<	dbs_url_map = {'glite': global_url,
114	<	'glite_slc5':global_url,\
115	<	'glitecoll':global_url,\
116	<	'condor': global_url,\
117	<	'condor_g': global_url,\
118	<	'glidein': global_url,\
119	<	'lsf': global_url,\
120	<	'caf': caf_url,\
121	<	'sge': global_url,\
122	<	'arc': global_url,\
123	<	'pbs': global_url
124	<	}
125	<
126	<	dbs_url_default = dbs_url_map[(common.scheduler.name()).lower()]
127	<	dbs_url= self.cfg_params.get('CMSSW.dbs_url', dbs_url_default)
119	>	dbs_url= self.cfg_params.get('CMSSW.dbs_url', global_url)
120		common.logger.info("Accessing DBS at: "+dbs_url)
121
122		## check if runs are selected
123		runselection = []
124		if (self.cfg_params.has_key('CMSSW.runselection')):
125		runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
126	<
126	>	if len(runselection)>1000000:
127	>	common.logger.info("ERROR: runselection range has more then 1M numbers")
128	>	common.logger.info("ERROR: Too large. runselection is ignored")
129	>	runselection=[]
130	>
131	>	## check if various lumi parameters are set
132	>	self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
133	>	self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
134	>	self.cfg_params.get('CMSSW.lumis_per_job',None)
135	>
136	>	lumiList = None
137	>	if self.lumiMask:
138	>	lumiList = LumiList(filename=self.lumiMask)
139	>	if runselection:
140	>	runList = LumiList(runs = runselection)
141
142		self.splitByRun = int(self.cfg_params.get('CMSSW.split_by_run', 0))
143	<
143	>	self.splitDataByEvent = int(self.cfg_params.get('CMSSW.split_by_event', 0))
144		common.logger.log(10-1,"runselection is: %s"%runselection)
145	+
146	+	if not self.splitByRun:
147	+	self.splitByLumi = self.lumiMask or self.lumiParams or self.ads
148	+
149	+	if self.splitByRun and not runselection:
150	+	msg = "Error: split_by_run must be combined with a runselection"
151	+	raise CrabException(msg)
152	+
153		## service API
154		args = {}
155		args['url'] = dbs_url
156		args['level'] = 'CRITICAL'
143	–	args['adshome'] = '$HOME/DBSADS'
157
158		## check if has been requested to use the parent info
159		useparent = int(self.cfg_params.get('CMSSW.use_parent',0))
#	Line 152 \| Line 165 \| class DataDiscovery:
165		api = DBSAPI.dbsApi.DbsApi(args)
166		self.files = self.queryDbs(api,path=self.datasetPath,runselection=runselection,useParent=useparent)
167
168	+	# Check to see what the dataset is
169	+	pdsName = self.datasetPath.split("/")[1]
170	+	primDSs = api.listPrimaryDatasets(pdsName)
171	+	dataType = primDSs[0]['Type']
172	+	common.logger.debug("Datatype is %s" % dataType)
173	+	if dataType == 'data' and not \
174	+	(self.splitByRun or self.splitByLumi or self.splitDataByEvent):
175	+	msg = 'Data must be split by lumi or by run. ' \
176	+	'Please see crab -help for the correct settings'
177	+	raise CrabException(msg)
178	+
179	+
180	+
181		anFileBlocks = []
182		if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
183
184		# parse files and fill arrays
185		for file in self.files :
186		parList = []
187	<	lumiList = [] # List of tuples
187	>	fileLumis = [] # List of tuples
188		# skip already analyzed blocks
189		fileblock = file['Block']['Name']
190		if fileblock not in anFileBlocks :
#	Line 166 \| Line 192 \| class DataDiscovery:
192		# asked retry the list of parent for the given child
193		if useparent==1:
194		parList = [x['LogicalFileName'] for x in file['ParentList']]
195	<	if self.ads:
196	<	lumiList = [ (x['RunNumber'], x['LumiSectionNumber'])
195	>	if self.splitByLumi:
196	>	fileLumis = [ (x['RunNumber'], x['LumiSectionNumber'])
197		for x in file['LumiList'] ]
198		self.parent[filename] = parList
199	<	self.lumis[filename] = lumiList
199	>	# For LumiMask, intersection of two lists.
200	>	if self.lumiMask and runselection:
201	>	self.lumis[filename] = runList.filterLumis(lumiList.filterLumis(fileLumis))
202	>	elif runselection:
203	>	self.lumis[filename] = runList.filterLumis(fileLumis)
204	>	elif self.lumiMask:
205	>	self.lumis[filename] = lumiList.filterLumis(fileLumis)
206	>	else:
207	>	self.lumis[filename] = fileLumis
208		if filename.find('.dat') < 0 :
209		events = file['NumberOfEvents']
210		# Count number of events and lumis per block
#	Line 189 \| Line 223 \| class DataDiscovery:
223
224		# total number of events
225		self.maxEvents += events
226	<	self.maxLumis += len(lumiList)
226	>	self.maxLumis += len(self.lumis[filename])
227
228		if self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
229		msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
230		raise CrabException(msg)
231
198	–	saveFblocks=''
199	–	for block in self.eventsPerBlock.keys() :
200	–	saveFblocks += str(block)+'\n'
201	–	common.logger.log(10-1,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
202	–	writeTXTfile(self, fileBlocks_FileName , saveFblocks)
232
233		if len(self.eventsPerBlock) <= 0:
234		raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
#	Line 209 \| Line 238 \| class DataDiscovery:
238
239		def queryDbs(self,api,path=None,runselection=None,useParent=None):
240
241	<	allowedRetriveValue = ['retrive_block', 'retrive_run']
242	<	if self.ads: allowedRetriveValue.append('retrive_lumi')
243	<	if useParent == 1: allowedRetriveValue.append('retrive_parent')
241	>
242	>	allowedRetriveValue = []
243	>	if self.splitByLumi or self.splitByRun or useParent == 1:
244	>	allowedRetriveValue.extend(['retrive_block', 'retrive_run'])
245	>	if self.splitByLumi:
246	>	allowedRetriveValue.append('retrive_lumi')
247	>	if useParent == 1:
248	>	allowedRetriveValue.append('retrive_parent')
249		common.logger.debug("Set of input parameters used for DBS query: %s" % allowedRetriveValue)
250		try:
251	<	if len(runselection) <=0 :
252	<	if useParent==1 or self.splitByRun==1 or self.ads:
219	<	if self.ads:
220	<	files = api.listFiles(analysisDataset=path, retriveList=allowedRetriveValue)
221	<	else :
222	<	files = api.listFiles(path=path, retriveList=allowedRetriveValue)
223	<	else:
224	<	files = api.listDatasetFiles(self.datasetPath)
225	<	else :
226	<	files=[]
251	>	if self.splitByRun:
252	>	files = []
253		for arun in runselection:
254		try:
255		if self.ads:
#	Line 236 \| Line 262 \| class DataDiscovery:
262		common.logger.info(msg)
263		pass
264
265	+	else:
266	+	if allowedRetriveValue:
267	+	if self.ads:
268	+	files = api.listFiles(analysisDataset=path, retriveList=allowedRetriveValue)
269	+	else :
270	+	files = api.listFiles(path=path, retriveList=allowedRetriveValue)
271	+	else:
272	+	files = api.listDatasetFiles(self.datasetPath)
273	+
274		except DbsBadRequest, msg:
275		raise DataDiscoveryError(msg)
276		except DBSError, msg:

Diff Legend

-–
+Removed lines
-+
+Added lines
-<
+Changed lines
->
+Changed lines

Comparing COMP/CRAB/python/DataDiscovery.py (file contents): Revision 1.37.2.1 by ewv, Thu Jan 21 16:13:28 2010 UTC vs. Revision 1.50 by belforte, Thu Sep 12 13:45:22 2013 UTC

Diff Legend

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.37.2.1 by ewv, Thu Jan 21 16:13:28 2010 UTC vs.
Revision 1.50 by belforte, Thu Sep 12 13:45:22 2013 UTC