ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
(Generate patch)

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.17 by afanfani, Thu Feb 22 14:22:13 2007 UTC vs.
Revision 1.23 by spiga, Mon Jun 9 17:50:45 2008 UTC

# Line 1 | Line 1
1   #!/usr/bin/env python
2 < from DBSInfo import *
2 > import exceptions
3 > import DBSAPI.dbsApi
4 > from DBSAPI.dbsApiException import *
5 > import common
6 > from crab_util import *
7 > import os
8  
9  
10 + # #######################################
11 + class DBSError(exceptions.Exception):
12 +    def __init__(self, errorName, errorMessage):
13 +        args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
14 +        exceptions.Exception.__init__(self, args)
15 +        pass
16 +    
17 +    def getErrorMessage(self):
18 +        """ Return error message """
19 +        return "%s" % (self.args)
20 +
21 + # #######################################
22 + class DBSInvalidDataTierError(exceptions.Exception):
23 +    def __init__(self, errorName, errorMessage):
24 +        args='\nERROR DBS %s : %s \n'%(errorName,errorMessage)
25 +        exceptions.Exception.__init__(self, args)
26 +        pass
27 +    
28 +    def getErrorMessage(self):
29 +        """ Return error message """
30 +        return "%s" % (self.args)
31 +
32 + # #######################################
33 + class DBSInfoError:
34 +    def __init__(self, url):
35 +        print '\nERROR accessing DBS url : '+url+'\n'
36 +        pass
37 +
38   # ####################################
39   class DataDiscoveryError(exceptions.Exception):
40      def __init__(self, errorMessage):
# Line 38 | Line 71 | class NoDataTierinProvenanceError(except
71   # ####################################
72   # class to find and extact info from published data
73   class DataDiscovery:
74 <    def __init__(self, datasetPath, cfg_params):
74 >    def __init__(self, datasetPath, cfg_params, skipAnBlocks):
75  
76 < #       Attributes
76 >        #       Attributes
77          self.datasetPath = datasetPath
78          self.cfg_params = cfg_params
79 +        self.skipBlocks = skipAnBlocks
80  
81          self.eventsPerBlock = {}  # DBS output: map fileblocks-events for collection
82          self.eventsPerFile = {}   # DBS output: map files-events
83 <        self.blocksinfo = {}  # DBS output: map fileblocks-files
84 < #DBS output: max events computed by method getMaxEvents
83 >        self.blocksinfo = {}      # DBS output: map fileblocks-files
84 >        self.maxEvents = 0        # DBS output: max events
85 >        self.parent = {}       # DBS output: max events
86  
87   # ####################################
88      def fetchDBSInfo(self):
# Line 56 | Line 91 | class DataDiscovery:
91          """
92  
93          ## get DBS URL
94 <        try:
94 >        dbs_url="http://cmsdbsprod.cern.ch/cms_dbs_prod_global/servlet/DBSServlet"
95 >        if (self.cfg_params.has_key('CMSSW.dbs_url')):
96              dbs_url=self.cfg_params['CMSSW.dbs_url']
61        except KeyError:
62            dbs_url="http://cmsdbs.cern.ch/cms/prod/comp/DBS/CGIServer/prodquery"
97  
98 <        ## get info about the requested dataset
99 <        try:
100 <            dbs_instance=self.cfg_params['CMSSW.dbs_instance']
101 <        except KeyError:
102 <            dbs_instance="MCGlobal/Writer"
98 >        common.logger.debug(3,"Accessing DBS at: "+dbs_url)
99 >
100 >        ## check if runs are selected
101 >        runselection = []
102 >        if (self.cfg_params.has_key('CMSSW.runselection')):
103 >            runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
104 >
105 >        common.logger.debug(6,"runselection is: %s"%runselection)
106 >        ## service API
107 >        args = {}
108 >        args['url']     = dbs_url
109 >        args['level']   = 'CRITICAL'
110 >
111 >        ## check if has been requested to use the parent info
112 >        useParent = self.cfg_params.get('CMSSW.use_parent',False)
113 >
114 >        ## check if has been asked for a non default file to store/read analyzed fileBlocks  
115 >        defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt'  
116 >        fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
117  
118 <        dbs = DBSInfo(dbs_url, dbs_instance)
118 >        api = DBSAPI.dbsApi.DbsApi(args)
119          try:
120 <            self.datasets = dbs.getMatchingDatasets(self.datasetPath)
121 <        except DBSAPIOLD.dbsCgiApi.DbsCgiExecutionError, msg:
120 >            if len(runselection) <= 0 :
121 >                if useParent:
122 >                    allowedRetriveValue = ['retrive_parent',
123 >                                           'retrive_block',
124 >                                           'retrive_lumi',
125 >                                           'retrive_run'
126 >                                           ]
127 >                    files = api.listFiles(path=self.datasetPath, retriveList=allowedRetriveValue)
128 >                    common.logger.debug(5,"Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
129 >                    common.logger.write("Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
130 >                else:
131 >                    files = api.listDatasetFiles(self.datasetPath)
132 >            else :
133 >                files=[]
134 >                for arun in runselection:
135 >                    try:
136 >                        filesinrun = api.listFiles(path=self.datasetPath,retriveList=allowedRetriveValue,runNumber=arun)
137 >                        files.extend(filesinrun)
138 >                    except:
139 >                        msg="WARNING: problem extracting info from DBS for run %s "%arun
140 >                        common.logger.message(msg)
141 >                        pass
142 >
143 >        except DbsBadRequest, msg:
144              raise DataDiscoveryError(msg)
145          except DBSError, msg:
146              raise DataDiscoveryError(msg)
147  
148 <        if len(self.datasets) == 0:
149 <            raise DataDiscoveryError("DatasetPath=%s unknown to DBS" %self.datasetPath)
80 <        if len(self.datasets) > 1:
81 <            raise DataDiscoveryError("DatasetPath=%s is ambiguous" %self.datasetPath)
82 <
83 <        try:
84 <            self.dbsdataset = self.datasets[0].get('datasetPathName')
148 >        anFileBlocks = []
149 >        if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
150  
151 <            self.eventsPerBlock = dbs.getEventsPerBlock(self.dbsdataset)
152 <            self.blocksinfo = dbs.getDatasetFileBlocks(self.dbsdataset)
153 <            self.eventsPerFile = dbs.getEventsPerFile(self.dbsdataset)
154 <        except DBSError, ex:
155 <            raise DataDiscoveryError(ex.getErrorMessage())
156 <        
151 >        # parse files and fill arrays
152 >        for file in files :
153 >            parList = []
154 >            # skip already analyzed blocks
155 >            fileblock = file['Block']['Name']
156 >            if fileblock not in anFileBlocks :
157 >                filename = file['LogicalFileName']
158 >                # asked retry the list of parent for the given child
159 >                if useParent: parList = [x['LogicalFileName'] for x in file['ParentList']]
160 >                self.parent[filename] = parList
161 >                if filename.find('.dat') < 0 :
162 >                    events    = file['NumberOfEvents']
163 >                    # number of events per block
164 >                    if fileblock in self.eventsPerBlock.keys() :
165 >                        self.eventsPerBlock[fileblock] += events
166 >                    else :
167 >                        self.eventsPerBlock[fileblock] = events
168 >                    # number of events per file
169 >                    self.eventsPerFile[filename] = events
170 >            
171 >                    # number of events per block
172 >                    if fileblock in self.blocksinfo.keys() :
173 >                        self.blocksinfo[fileblock].append(filename)
174 >                    else :
175 >                        self.blocksinfo[fileblock] = [filename]
176 >            
177 >                    # total number of events
178 >                    self.maxEvents += events
179 >        if  self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
180 >            msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
181 >            raise  CrabException(msg)    
182 >
183 >        saveFblocks=''
184 >        for block in self.eventsPerBlock.keys() :
185 >            saveFblocks += str(block)+'\n'
186 >            common.logger.debug(6,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
187 >        writeTXTfile(self, fileBlocks_FileName , saveFblocks)
188 >                      
189          if len(self.eventsPerBlock) <= 0:
190 <            raise NotExistingDatasetError (("\nNo data for %s in DBS\nPlease check"
190 >            raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
191                                              + " dataset path variables in crab.cfg")
192 <                                            % self.dbsdataset)
192 >                                            % self.datasetPath)
193  
194  
195   # #################################################
# Line 100 | Line 197 | class DataDiscovery:
197          """
198          max events
199          """
200 <        ## loop over the event collections
104 <        nevts=0      
105 <        for evc_evts in self.eventsPerBlock.values():
106 <            nevts=nevts+evc_evts
107 <
108 <        return nevts
200 >        return self.maxEvents
201  
202   # #################################################
203      def getEventsPerBlock(self):
# Line 128 | Line 220 | class DataDiscovery:
220          """
221          return self.blocksinfo        
222  
223 + # #################################################
224 +    def getParent(self):
225 +        """
226 +        return parent grouped by file
227 +        """
228 +        return self.parent        
229 +
230   ########################################################################

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines