ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PubDB.py
(Generate patch)

Comparing COMP/CRAB/python/PubDB.py (file contents):
Revision 1.4 by slacapra, Wed Aug 10 16:52:51 2005 UTC vs.
Revision 1.5 by slacapra, Tue Aug 23 10:38:09 2005 UTC

# Line 10 | Line 10 | class PubDBError:
10      def __init__(self, url):
11          print '\nERROR accessing PubDB at '+url+'\n'
12          pass
13 <
13 >
14 > # ####################################
15 > class PubDBGetAnalysisError:
16 >  def __init__(self, url,Collections):
17 >    print '\nERROR extracting info for collections '+Collections+' from PubDB '+url+'.\n'
18 >    pass
19 >  
20   # ####################################
21   class RefDBmapError:
22      def __init__(self, url):
# Line 44 | Line 50 | class PubDB:
50          self.owner = owner
51          self.dataset = dataset
52          self.dataTiers = dataTiers
53 +        self.NeededdataTiers=[]
54          self.cfg_params = cfg_params
55      
56          self.RefDBurl_ = 'http://cmsdoc.cern.ch/cms/production/www/'
# Line 63 | Line 70 | class PubDB:
70  
71          CEBlackList = []
72          try:
73 <            tmpBad = string.split(self.cfg_params['USER.ce_black_list'],',')
73 >            tmpBad = string.split(self.cfg_params['EDG.ce_black_list'],',')
74              #tmpBad = ['fnal']
75              for tmp in tmpBad:
76                  tmp=string.strip(tmp)
77 +                if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch              
78                  CEBlackList.append(tmp)
79          except KeyError:
80              pass
81 <        print 'CEBlackList: ',CEBlackList
81 >        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
82          self.reCEBlackList=[]
83          for bad in CEBlackList:
84              self.reCEBlackList.append(re.compile( bad ))
# Line 98 | Line 106 | class PubDB:
106              ## select the primary collection
107              if first:
108                  NeededCollID.append(coll[0])
109 +                self.NeededdataTiers.append(coll[2])
110                  refdbdataTiers.append(coll[2])
111                  common.logger.message("\n --> primary collection for owner "+self.owner+" is: ID="+coll[0]+" DataTier="+coll[2])
112                  first=0
# Line 105 | Line 114 | class PubDB:
114                  ## select only the parents collections corresponding to data-tiers requested by the user
115                  if  self.dataTiers.count(coll[2]):
116                      NeededCollID.append(coll[0])
117 +                    self.NeededdataTiers.append(coll[2])
118                      common.logger.message(" --> further collection required: ID="+coll[0]+" DataTier="+coll[2])
119                  refdbdataTiers.append(coll[2])
120            
121          ## check that the user asks for Data Tier really existing in RefDB, otherwise give a warning message
122          for dt in self.dataTiers:
123              if refdbdataTiers.count(dt)<=0:
124 <                msg = "ERROR: Data Tier ( =>",dt,"<= ) not existing for dataset/owner "+ self.dataset+"/"+self.owner+"!"
125 <                msg = msg + "Check the data_tier variable in crab.cfg"
116 <                msg = msg + 'Owner Dataset not published with asked dataTiers! '+\
124 >                msg = "ERROR: Data Tier ( =>",dt,"<= ) not existing for dataset/owner "+self.dataset+"/"+self.owner+"! "
125 >                msg = str(msg) + 'Owner Dataset not published with asked dataTiers! '+\
126                         self.owner+' '+ self.dataset+' '+self.dataTiers
127 <                common.logger.message(msg)
127 >                msg = str(msg) + ' Check the data_tier variable in crab.cfg !\n'
128 >                common.logger.message(msg)
129                  return []
130          
131          #print 'Needed Collections are ', NeededCollID
# Line 179 | Line 189 | class PubDB:
189                  SelectedPubDBURLs.append(url)
190        
191          #print 'Required Collections',CollIDs,'are all present in PubDBURLs : ',SelectedPubDBURLs,'\n'
182        #return SelectedPubDBURLs
192    ####  check based on CE black list: select only PubDB not in the CE black list  
193          GoodPubDBURLs=self.checkBlackList(SelectedPubDBURLs)
194          return GoodPubDBURLs
# Line 197 | Line 206 | class PubDB:
206   #######################################################################
207      def checkBlackList(self, pubDBUrls):
208          """
209 <        select PubDB URLs that are at site not excluded by the user (via CE black list)
209 >        select PubDB URLs that are at site not exluded by the user (via CE black list)
210          """
211          goodurls = []
212          for url in pubDBUrls:
213 <            print 'connecting to the URL ',url
213 >            common.logger.debug(10,'connecting to the URL '+url)
214              good=1
215              for re in self.reCEBlackList:
216                  if re.search(url):
# Line 212 | Line 221 | class PubDB:
221          if len(goodurls) == 0:
222              common.logger.debug(3,"No selected PubDB URLs")
223          return goodurls
224 <  
224 >
225 > ########################################################################
226 >    def checkPubDBNewVersion(self, baseurl):
227 >        """
228 >        Check PubDB version to find out if it's new-style or old-style
229 >        """
230 > ### check based on the existance of pubdb-get-version.php
231 >        urlversion=baseurl+'pubdb-get-version.php'
232 >        newversion=1;
233 >        try:
234 >         v = urllib2.urlopen(urlversion)
235 >        except urllib2.URLError, msg:
236 >          #print "WARNING: no URL to get PubDB version "
237 >          newversion=0;
238 >      
239 >        if (newversion) :
240 >         schemaversion = v.read()
241 >         #print schemaversion;
242 >  
243 >        return newversion
244 >
245   ########################################################################
246 <    def getPubDBData(self, CollIDs, url):
246 >    def getPubDBData(self, CollIDs, url , newversion):
247          """
248           Contact a PubDB to collect all the relevant information
249          """
250          result = []
251 <        for CollID in CollIDs:
252 <            end=string.rfind(url,'/')
253 <            lastEq=string.rfind(url,'=')
251 >        
252 > ### get the base PubDb url
253 >        end=string.rfind(url,'/')
254 >        lastEq=string.rfind(url,'=')
255 >
256 >        if (newversion) :
257 > ### from PubDB V4 : get info for all the collections in one shot and unserialize the content
258 >           Collections=string.join(CollIDs,'-')
259 >           ## add the PU among the required Collections if the Digi are requested
260 >           # ( for the time being asking it directly to the PubDB so the RefDB
261 >           # level data discovery is bypassed..... in future when every site
262 >           # will have the new style it will be possible to ask for PU , at RefDB level, in method findAllCollections )
263 >           if ( self.NeededdataTiers.count('Digi') ):
264 >             PUCollID=self.getDatatierCollID(url[:end+1],Collections,"PU")
265 >             if (PUCollID) : CollIDs.append(PUCollID)
266 >           ##
267 >           Collections=string.join(CollIDs,'-')
268 >           ### download from PubDB all the info about the given collections
269 >           pubdb_analysis=PubDBInfo(url[:end+1],Collections)
270 >           #print pubdb_analysis.GetPubDBInfo()
271 >           ok=0
272 >           try:
273 >             catInfos=pubdb_analysis.GetPubDBInfo()
274 >             ok=1
275 >           except :
276 >             #print "WARNING: can't get PubDB content out of "+url[:end+1]+"\n"
277 >             print '\nERROR extracting info for collections '+Collections+' from PubDB '+url[:end+1]+'. The PU might not be published at that site.\n'
278 >             #raise PubDBGetAnalysisError(url[:end+1],Collections)  
279 >           if (ok): result=catInfos;
280 >
281 >        else:
282 >
283 > ### before PubDB V4 : get info for each collection and read the key-value pair text
284 >              
285 >          for CollID in CollIDs:
286              urlphp=url[:end+1]+self.PubDBAnalysisPhp_+'?CollID='+CollID
287              # print 'PHP URL: '+urlphp+' \n'
288 <          
288 >
289              reOld=re.compile( r'V24' )
290              #print urlphp,'Old PubDB ',reOld.search(urlphp)
291              if reOld.search(urlphp):
# Line 249 | Line 310 | class PubDB:
310          #     r.dump()
311          #print '.....'
312          return result
313 <  
313 >
314 > ########################################################################
315 >    def getDatatierCollID(self,urlbase,CollIDString,datatier):
316 >        """
317 >        Contact a script of PubDB to retrieve the collid a DataTier
318 >        """
319 >        try:
320 >          f = urllib.urlopen(urlbase+'pubdb-get-collidbydatatier.php?collid='+CollIDString+"&datatier="+datatier)
321 >        except IOError:
322 >          raise PubDBGetAnalysisError(url[:end+1]+'pubdb-get-collidbydatatier.php',CollIDString)
323 >        data = f.read()
324 >        colldata=re.compile(r'collid=(\S*)').search(data);
325 >        if colldata:
326 >           datatier_CollID=colldata.group(1)
327 > #           print " --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier
328 >           common.logger.message(" --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier)
329 >
330 >           return datatier_CollID      
331 >
332   ########################################################################
333 <    def getAllPubDBData(self, CollIDs, urllist):
333 >    def getAllPubDBData(self):
334          """
335           Contact a list of PubDB to collect all the relevant information
336          """
337 <        completeResult=[]
337 >        newPubDBResult=[]
338 >        oldPubDBResult=[]
339 >        Result={}
340 >
341 > ### find the user-required collection IDs
342 >        CollIDs = self.findAllCollections()
343 > ### find the PubDB URLs publishing the needed data
344 >        urllist = self.findPubDBs(CollIDs)
345 > ### collect information sparatelly from new-style PubDBs and old-style PubDBs
346          for pubdburl in urllist:
347 <            completeResult.append(self.getPubDBData(CollIDs,pubdburl))
348 <        
347 >            end=string.rfind(pubdburl,'/')
348 >            newversion=self.checkPubDBNewVersion(pubdburl[:end+1])
349 >            if (newversion):
350 >              res=self.getPubDBData(CollIDs,pubdburl,newversion)
351 >              if len(res)>0:
352 >               newPubDBResult.append(res)
353 >            else:
354 >              resold=self.getPubDBData(CollIDs,pubdburl,newversion)
355 >              if len(resold)>0:
356 >               oldPubDBResult.append(resold)
357 > ### fill a dictionary with all the PubBDs results both old-style and new-style
358 >        Result['newPubDB']=newPubDBResult
359 >        Result['oldPubDB']=oldPubDBResult
360 >
361          ## print for debugging purpose
362 <        #for result in completeResult:
363 <        #   print '..... PubDB Site URL :',pubdburl
364 <        #   for r in result:
365 <        #      r.dump()
366 <        #   print '.....................................'
367 <        
368 <        return completeResult
362 >        #
363 >        #for PubDBversion in Result.keys():
364 >            #print ("key %s, val %s" %(PubDBversion,Result[PubDBversion]))
365 >        #    if len(Result[PubDBversion])>0 :
366 >               #print (" key %s"%(PubDBversion))
367 >        #       for result in Result[PubDBversion]:
368 >        #          for r in result:
369 >                      #r.dump()
370 >        #              common.log.write('----------------- \n')
371 >              #print '.....................................'
372 >
373 >        return Result
374 >
375   ####################################################################

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines