ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
Revision: 1.25
Committed: Thu Jul 3 23:02:38 2008 UTC (16 years, 9 months ago) by spiga
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_4_4, CRAB_2_4_4_pre6, CRAB_2_4_4_pre5, CRAB_2_4_4_pre4, CRAB_2_4_4_pre3, CRAB_2_4_4_pre2, CRAB_2_4_4_pre1, CRAB_2_4_3, CRAB_2_4_3_pre8, CRAB_2_4_3_pre7, CRAB_2_4_3_pre6, CRAB_2_4_3_pre5, CRAB_2_4_3_pre3, CRAB_2_4_3_pre2, CRAB_2_4_3_pre1, CRAB_2_4_2, CRAB_2_4_2_pre3, CRAB_2_4_2_pre2, CRAB_2_4_2_pre1, CRAB_2_4_1, CRAB_2_4_1_pre4, CRAB_2_4_1_pre3, CRAB_2_4_1_pre2, CRAB_2_4_1_pre1, CRAB_2_4_0_Tutorial, CRAB_2_4_0_Tutorial_pre1, CRAB_2_4_0, CRAB_2_4_0_pre9, CRAB_2_4_0_pre8, CRAB_2_4_0_pre7, CRAB_2_4_0_pre6, CRAB_2_4_0_pre5, CRAB_2_4_0_pre4, CRAB_2_4_0_pre3, CRAB_2_4_0_pre2, CRAB_2_4_0_pre1, CRAB_DLS_PHED1, CRAB_DLS_PHED, CRAB_2_3_2_Fnal, CRAB_2_3_2, CRAB_2_3_2_pre7, CRAB_2_3_2_pre5, CRAB_2_3_2_pre4, CRAB_2_3_2_pre3, CRAB_2_3_2_pre2, CRAB_2_3_2_pre1, CRAB_2_4_0_test
Branch point for: AnaDataSet
Changes since 1.24: +13 -4 lines
Log Message:
added support for default DBS url (globa, caf...)

File Contents

# User Rev Content
1 gutsche 1.6 #!/usr/bin/env python
2 slacapra 1.18 import exceptions
3     import DBSAPI.dbsApi
4     from DBSAPI.dbsApiException import *
5     import common
6     from crab_util import *
7 spiga 1.22 import os
8 afanfani 1.1
9 afanfani 1.3
10 slacapra 1.18 # #######################################
11     class DBSError(exceptions.Exception):
12     def __init__(self, errorName, errorMessage):
13     args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
14     exceptions.Exception.__init__(self, args)
15     pass
16    
17     def getErrorMessage(self):
18     """ Return error message """
19     return "%s" % (self.args)
20    
21     # #######################################
22     class DBSInvalidDataTierError(exceptions.Exception):
23     def __init__(self, errorName, errorMessage):
24     args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
25     exceptions.Exception.__init__(self, args)
26     pass
27    
28     def getErrorMessage(self):
29     """ Return error message """
30     return "%s" % (self.args)
31    
32     # #######################################
33     class DBSInfoError:
34     def __init__(self, url):
35     print '\nERROR accessing DBS url : '+url+'\n'
36     pass
37    
38 afanfani 1.1 # ####################################
39 afanfani 1.3 class DataDiscoveryError(exceptions.Exception):
40 slacapra 1.7 def __init__(self, errorMessage):
41 gutsche 1.15 self.args=errorMessage
42 slacapra 1.14 exceptions.Exception.__init__(self, self.args)
43 slacapra 1.7 pass
44    
45     def getErrorMessage(self):
46     """ Return exception error """
47     return "%s" % (self.args)
48 afanfani 1.3
49 afanfani 1.1 # ####################################
50 afanfani 1.3 class NotExistingDatasetError(exceptions.Exception):
51 slacapra 1.7 def __init__(self, errorMessage):
52 gutsche 1.15 self.args=errorMessage
53 slacapra 1.14 exceptions.Exception.__init__(self, self.args)
54 slacapra 1.7 pass
55    
56     def getErrorMessage(self):
57     """ Return exception error """
58     return "%s" % (self.args)
59 afanfani 1.1
60     # ####################################
61 afanfani 1.3 class NoDataTierinProvenanceError(exceptions.Exception):
62 slacapra 1.7 def __init__(self, errorMessage):
63 gutsche 1.15 self.args=errorMessage
64 slacapra 1.14 exceptions.Exception.__init__(self, self.args)
65 slacapra 1.7 pass
66    
67     def getErrorMessage(self):
68     """ Return exception error """
69     return "%s" % (self.args)
70 afanfani 1.1
71     # ####################################
72     # class to find and extact info from published data
73     class DataDiscovery:
74 spiga 1.22 def __init__(self, datasetPath, cfg_params, skipAnBlocks):
75 afanfani 1.1
76 slacapra 1.18 # Attributes
77 slacapra 1.11 self.datasetPath = datasetPath
78 afanfani 1.1 self.cfg_params = cfg_params
79 spiga 1.22 self.skipBlocks = skipAnBlocks
80 afanfani 1.1
81 slacapra 1.11 self.eventsPerBlock = {} # DBS output: map fileblocks-events for collection
82     self.eventsPerFile = {} # DBS output: map files-events
83 slacapra 1.18 self.blocksinfo = {} # DBS output: map fileblocks-files
84     self.maxEvents = 0 # DBS output: max events
85 spiga 1.21 self.parent = {} # DBS output: max events
86 afanfani 1.1
87     # ####################################
88     def fetchDBSInfo(self):
89     """
90     Contact DBS
91     """
92 slacapra 1.11 ## get DBS URL
93 spiga 1.25 global_url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
94     caf_url = "http://cmsdbsprod.cern.ch/cms_dbs_caf_analysis_01/servlet/DBSServlet"
95     dbs_url_map = {'glite': global_url,
96     'glitecoll':global_url,\
97     'condor': global_url,\
98     'condor_g': global_url,\
99     'glidein': global_url,\
100     'lsf': global_url,\
101     'caf': caf_url,\
102     'sge': global_url
103     }
104 afanfani 1.3
105 spiga 1.25 dbs_url_default = dbs_url_map[(common.scheduler.name()).lower()]
106     dbs_url= self.cfg_params.get('CMSSW.dbs_url', dbs_url_default)
107 slacapra 1.18 common.logger.debug(3,"Accessing DBS at: "+dbs_url)
108    
109     ## check if runs are selected
110 slacapra 1.19 runselection = []
111     if (self.cfg_params.has_key('CMSSW.runselection')):
112 slacapra 1.18 runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
113    
114 afanfani 1.20 common.logger.debug(6,"runselection is: %s"%runselection)
115 slacapra 1.18 ## service API
116     args = {}
117     args['url'] = dbs_url
118     args['level'] = 'CRITICAL'
119    
120 spiga 1.21 ## check if has been requested to use the parent info
121 spiga 1.22 useParent = self.cfg_params.get('CMSSW.use_parent',False)
122 spiga 1.21
123 spiga 1.22 ## check if has been asked for a non default file to store/read analyzed fileBlocks
124     defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt'
125     fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
126    
127 slacapra 1.18 api = DBSAPI.dbsApi.DbsApi(args)
128 spiga 1.24 allowedRetriveValue = ['retrive_parent',
129     'retrive_block',
130     'retrive_lumi',
131     'retrive_run'
132     ]
133 afanfani 1.5 try:
134 slacapra 1.18 if len(runselection) <= 0 :
135 spiga 1.22 if useParent:
136 spiga 1.23 files = api.listFiles(path=self.datasetPath, retriveList=allowedRetriveValue)
137 spiga 1.22 common.logger.debug(5,"Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
138     common.logger.write("Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
139     else:
140     files = api.listDatasetFiles(self.datasetPath)
141 slacapra 1.18 else :
142 afanfani 1.20 files=[]
143     for arun in runselection:
144     try:
145 spiga 1.21 filesinrun = api.listFiles(path=self.datasetPath,retriveList=allowedRetriveValue,runNumber=arun)
146 afanfani 1.20 files.extend(filesinrun)
147     except:
148     msg="WARNING: problem extracting info from DBS for run %s "%arun
149     common.logger.message(msg)
150     pass
151    
152 slacapra 1.18 except DbsBadRequest, msg:
153 slacapra 1.11 raise DataDiscoveryError(msg)
154 slacapra 1.13 except DBSError, msg:
155     raise DataDiscoveryError(msg)
156 slacapra 1.11
157 spiga 1.22 anFileBlocks = []
158     if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
159    
160 slacapra 1.18 # parse files and fill arrays
161     for file in files :
162 spiga 1.21 parList = []
163 spiga 1.22 # skip already analyzed blocks
164     fileblock = file['Block']['Name']
165     if fileblock not in anFileBlocks :
166     filename = file['LogicalFileName']
167     # asked retry the list of parent for the given child
168     if useParent: parList = [x['LogicalFileName'] for x in file['ParentList']]
169     self.parent[filename] = parList
170     if filename.find('.dat') < 0 :
171     events = file['NumberOfEvents']
172     # number of events per block
173     if fileblock in self.eventsPerBlock.keys() :
174     self.eventsPerBlock[fileblock] += events
175     else :
176     self.eventsPerBlock[fileblock] = events
177     # number of events per file
178     self.eventsPerFile[filename] = events
179    
180     # number of events per block
181     if fileblock in self.blocksinfo.keys() :
182     self.blocksinfo[fileblock].append(filename)
183     else :
184     self.blocksinfo[fileblock] = [filename]
185    
186     # total number of events
187     self.maxEvents += events
188     if self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
189     msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
190     raise CrabException(msg)
191 slacapra 1.11
192 spiga 1.22 saveFblocks=''
193 slacapra 1.18 for block in self.eventsPerBlock.keys() :
194 spiga 1.22 saveFblocks += str(block)+'\n'
195 slacapra 1.18 common.logger.debug(6,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
196 spiga 1.22 writeTXTfile(self, fileBlocks_FileName , saveFblocks)
197    
198 slacapra 1.11 if len(self.eventsPerBlock) <= 0:
199 slacapra 1.18 raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
200 slacapra 1.11 + " dataset path variables in crab.cfg")
201 slacapra 1.18 % self.datasetPath)
202 afanfani 1.1
203    
204     # #################################################
205     def getMaxEvents(self):
206     """
207 slacapra 1.11 max events
208 afanfani 1.1 """
209 slacapra 1.18 return self.maxEvents
210 afanfani 1.1
211     # #################################################
212 slacapra 1.11 def getEventsPerBlock(self):
213 afanfani 1.1 """
214 slacapra 1.11 list the event collections structure by fileblock
215 afanfani 1.1 """
216 slacapra 1.11 return self.eventsPerBlock
217 afanfani 1.1
218     # #################################################
219 slacapra 1.11 def getEventsPerFile(self):
220 afanfani 1.1 """
221 slacapra 1.11 list the event collections structure by file
222 afanfani 1.1 """
223 slacapra 1.11 return self.eventsPerFile
224 afanfani 1.1
225     # #################################################
226 slacapra 1.11 def getFiles(self):
227 afanfani 1.1 """
228 slacapra 1.11 return files grouped by fileblock
229 afanfani 1.1 """
230 slacapra 1.11 return self.blocksinfo
231 afanfani 1.1
232 spiga 1.21 # #################################################
233     def getParent(self):
234     """
235     return parent grouped by file
236     """
237     return self.parent
238    
239 afanfani 1.1 ########################################################################