ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
(Generate patch)

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.19 by slacapra, Fri Jan 4 17:30:56 2008 UTC vs.
Revision 1.23 by spiga, Mon Jun 9 17:50:45 2008 UTC

# Line 4 | Line 4 | import DBSAPI.dbsApi
4   from DBSAPI.dbsApiException import *
5   import common
6   from crab_util import *
7 + import os
8  
9  
10   # #######################################
# Line 70 | Line 71 | class NoDataTierinProvenanceError(except
71   # ####################################
72   # class to find and extact info from published data
73   class DataDiscovery:
74 <    def __init__(self, datasetPath, cfg_params):
74 >    def __init__(self, datasetPath, cfg_params, skipAnBlocks):
75  
76          #       Attributes
77          self.datasetPath = datasetPath
78          self.cfg_params = cfg_params
79 +        self.skipBlocks = skipAnBlocks
80  
81          self.eventsPerBlock = {}  # DBS output: map fileblocks-events for collection
82          self.eventsPerFile = {}   # DBS output: map files-events
83          self.blocksinfo = {}      # DBS output: map fileblocks-files
84          self.maxEvents = 0        # DBS output: max events
85 +        self.parent = {}       # DBS output: max events
86  
87   # ####################################
88      def fetchDBSInfo(self):
# Line 99 | Line 102 | class DataDiscovery:
102          if (self.cfg_params.has_key('CMSSW.runselection')):
103              runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
104  
105 +        common.logger.debug(6,"runselection is: %s"%runselection)
106          ## service API
107          args = {}
108          args['url']     = dbs_url
109          args['level']   = 'CRITICAL'
110  
111 +        ## check if has been requested to use the parent info
112 +        useParent = self.cfg_params.get('CMSSW.use_parent',False)
113 +
114 +        ## check if has been asked for a non default file to store/read analyzed fileBlocks  
115 +        defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt'  
116 +        fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
117 +
118          api = DBSAPI.dbsApi.DbsApi(args)
119          try:
120              if len(runselection) <= 0 :
121 <                files = api.listDatasetFiles(self.datasetPath)
121 >                if useParent:
122 >                    allowedRetriveValue = ['retrive_parent',
123 >                                           'retrive_block',
124 >                                           'retrive_lumi',
125 >                                           'retrive_run'
126 >                                           ]
127 >                    files = api.listFiles(path=self.datasetPath, retriveList=allowedRetriveValue)
128 >                    common.logger.debug(5,"Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
129 >                    common.logger.write("Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
130 >                else:
131 >                    files = api.listDatasetFiles(self.datasetPath)
132              else :
133 <                files = api.listFiles(path=self.datasetPath, details=True)
133 >                files=[]
134 >                for arun in runselection:
135 >                    try:
136 >                        filesinrun = api.listFiles(path=self.datasetPath,retriveList=allowedRetriveValue,runNumber=arun)
137 >                        files.extend(filesinrun)
138 >                    except:
139 >                        msg="WARNING: problem extracting info from DBS for run %s "%arun
140 >                        common.logger.message(msg)
141 >                        pass
142 >
143          except DbsBadRequest, msg:
144              raise DataDiscoveryError(msg)
145          except DBSError, msg:
146              raise DataDiscoveryError(msg)
147  
148 +        anFileBlocks = []
149 +        if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
150 +
151          # parse files and fill arrays
152          for file in files :
153 <            filename = file['LogicalFileName']
154 <            if filename.find('.dat') < 0 :
155 <                fileblock = file['Block']['Name']
156 <                events    = file['NumberOfEvents']
157 <                continue_flag = 0
158 <                if len(runselection) > 0 :
159 <                    runslist = file['RunsList']
160 <                    for run in runslist :
161 <                        runnumber = run['RunNumber']
162 <                        for selected_run in runselection :
130 <                            if runnumber == selected_run :
131 <                                continue_flag = 1
132 <                else :
133 <                    continue_flag = 1
134 <
135 <                if continue_flag == 1 :
153 >            parList = []
154 >            # skip already analyzed blocks
155 >            fileblock = file['Block']['Name']
156 >            if fileblock not in anFileBlocks :
157 >                filename = file['LogicalFileName']
158 >                # asked retry the list of parent for the given child
159 >                if useParent: parList = [x['LogicalFileName'] for x in file['ParentList']]
160 >                self.parent[filename] = parList
161 >                if filename.find('.dat') < 0 :
162 >                    events    = file['NumberOfEvents']
163                      # number of events per block
164                      if fileblock in self.eventsPerBlock.keys() :
165                          self.eventsPerBlock[fileblock] += events
166                      else :
167                          self.eventsPerBlock[fileblock] = events
141
168                      # number of events per file
169                      self.eventsPerFile[filename] = events
170 <
170 >            
171                      # number of events per block
172                      if fileblock in self.blocksinfo.keys() :
173                          self.blocksinfo[fileblock].append(filename)
174                      else :
175                          self.blocksinfo[fileblock] = [filename]
176 <
176 >            
177                      # total number of events
178                      self.maxEvents += events
179 +        if  self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
180 +            msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
181 +            raise  CrabException(msg)    
182  
183 +        saveFblocks=''
184          for block in self.eventsPerBlock.keys() :
185 +            saveFblocks += str(block)+'\n'
186              common.logger.debug(6,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
187 <
187 >        writeTXTfile(self, fileBlocks_FileName , saveFblocks)
188 >                      
189          if len(self.eventsPerBlock) <= 0:
190              raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
191                                              + " dataset path variables in crab.cfg")
# Line 188 | Line 220 | class DataDiscovery:
220          """
221          return self.blocksinfo        
222  
223 + # #################################################
224 +    def getParent(self):
225 +        """
226 +        return parent grouped by file
227 +        """
228 +        return self.parent        
229 +
230   ########################################################################

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines