ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
(Generate patch)

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.21 by spiga, Thu Jun 5 07:13:31 2008 UTC vs.
Revision 1.24 by spiga, Mon Jun 16 10:44:33 2008 UTC

# Line 4 | Line 4 | import DBSAPI.dbsApi
4   from DBSAPI.dbsApiException import *
5   import common
6   from crab_util import *
7 + import os
8  
9  
10   # #######################################
# Line 70 | Line 71 | class NoDataTierinProvenanceError(except
71   # ####################################
72   # class to find and extact info from published data
73   class DataDiscovery:
74 <    def __init__(self, datasetPath, cfg_params):
74 >    def __init__(self, datasetPath, cfg_params, skipAnBlocks):
75  
76          #       Attributes
77          self.datasetPath = datasetPath
78          self.cfg_params = cfg_params
79 +        self.skipBlocks = skipAnBlocks
80  
81          self.eventsPerBlock = {}  # DBS output: map fileblocks-events for collection
82          self.eventsPerFile = {}   # DBS output: map files-events
# Line 107 | Line 109 | class DataDiscovery:
109          args['level']   = 'CRITICAL'
110  
111          ## check if has been requested to use the parent info
110        if (self.cfg_params.has_key('CMSSW.runselection')):
111            runselection = parseRange2(self.cfg_params['CMSSW.runselection'])
112
112          useParent = self.cfg_params.get('CMSSW.use_parent',False)
113 <    
114 <        allowedRetriveValue = [
115 <                        'retrive_child',
116 <                        'retrive_block',
117 <                        'retrive_lumi',
119 <                        'retrive_run'
120 <                        ]
121 <        if useParent:  allowedRetriveValue.append('retrive_parent')
122 <        common.logger.debug(5,"Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
123 <        common.logger.write("Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
113 >
114 >        ## check if has been asked for a non default file to store/read analyzed fileBlocks  
115 >        defaultName = common.work_space.shareDir()+'AnalyzedBlocks.txt'  
116 >        fileBlocks_FileName = os.path.abspath(self.cfg_params.get('CMSSW.fileblocks_file',defaultName))
117 >
118          api = DBSAPI.dbsApi.DbsApi(args)
119 +        allowedRetriveValue = ['retrive_parent',
120 +                               'retrive_block',
121 +                               'retrive_lumi',
122 +                               'retrive_run'
123 +                               ]
124          try:
125              if len(runselection) <= 0 :
126 <                files = api.listFiles(path=self.datasetPath,retriveList=allowedRetriveValue)
126 >                if useParent:
127 >                    files = api.listFiles(path=self.datasetPath, retriveList=allowedRetriveValue)
128 >                    common.logger.debug(5,"Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
129 >                    common.logger.write("Set of input parameters used for DBS query : \n"+str(allowedRetriveValue))
130 >                else:
131 >                    files = api.listDatasetFiles(self.datasetPath)
132              else :
133                  files=[]
134                  for arun in runselection:
# Line 141 | Line 145 | class DataDiscovery:
145          except DBSError, msg:
146              raise DataDiscoveryError(msg)
147  
148 +        anFileBlocks = []
149 +        if self.skipBlocks: anFileBlocks = readTXTfile(self, fileBlocks_FileName)
150 +
151          # parse files and fill arrays
152          for file in files :
153              parList = []
154 <            filename = file['LogicalFileName']
155 <            # asked retry the list of parent for the given child
156 <            if useParent: parList = [x['LogicalFileName'] for x in file['ParentList']]
157 <            self.parent[filename] = parList
158 <            if filename.find('.dat') < 0 :
159 <                fileblock = file['Block']['Name']
160 <                events    = file['NumberOfEvents']
161 <                # number of events per block
162 <                if fileblock in self.eventsPerBlock.keys() :
163 <                    self.eventsPerBlock[fileblock] += events
164 <                else :
165 <                    self.eventsPerBlock[fileblock] = events
166 <                # number of events per file
167 <                self.eventsPerFile[filename] = events
168 <
169 <                # number of events per block
170 <                if fileblock in self.blocksinfo.keys() :
171 <                    self.blocksinfo[fileblock].append(filename)
172 <                else :
173 <                    self.blocksinfo[fileblock] = [filename]
174 <
175 <                # total number of events
176 <                self.maxEvents += events
154 >            # skip already analyzed blocks
155 >            fileblock = file['Block']['Name']
156 >            if fileblock not in anFileBlocks :
157 >                filename = file['LogicalFileName']
158 >                # asked retry the list of parent for the given child
159 >                if useParent: parList = [x['LogicalFileName'] for x in file['ParentList']]
160 >                self.parent[filename] = parList
161 >                if filename.find('.dat') < 0 :
162 >                    events    = file['NumberOfEvents']
163 >                    # number of events per block
164 >                    if fileblock in self.eventsPerBlock.keys() :
165 >                        self.eventsPerBlock[fileblock] += events
166 >                    else :
167 >                        self.eventsPerBlock[fileblock] = events
168 >                    # number of events per file
169 >                    self.eventsPerFile[filename] = events
170 >            
171 >                    # number of events per block
172 >                    if fileblock in self.blocksinfo.keys() :
173 >                        self.blocksinfo[fileblock].append(filename)
174 >                    else :
175 >                        self.blocksinfo[fileblock] = [filename]
176 >            
177 >                    # total number of events
178 >                    self.maxEvents += events
179 >        if  self.skipBlocks and len(self.eventsPerBlock.keys()) == 0:
180 >            msg = "No new fileblocks available for dataset: "+str(self.datasetPath)
181 >            raise  CrabException(msg)    
182  
183 +        saveFblocks=''
184          for block in self.eventsPerBlock.keys() :
185 +            saveFblocks += str(block)+'\n'
186              common.logger.debug(6,"DBSInfo: total nevts %i in block %s "%(self.eventsPerBlock[block],block))
187 <
187 >        writeTXTfile(self, fileBlocks_FileName , saveFblocks)
188 >                      
189          if len(self.eventsPerBlock) <= 0:
190              raise NotExistingDatasetError(("\nNo data for %s in DBS\nPlease check"
191                                              + " dataset path variables in crab.cfg")

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines