CRAB/python/PubDB.py

#!/usr/bin/env python2
import sys, os, string, re
import urllib, urllister
import urllib2
import common
from RefDBInfo import RefDBInfo
from PubDBInfo import *

# ####################################
class PubDBError:
    def __init__(self, url):
        print '\nERROR accessing PubDB at '+url+'\n'
        pass

# ####################################
class PubDBGetAnalysisError:
  def __init__(self, url,Collections):
    print '\nERROR extracting info for collections '+Collections+' from PubDB '+url+'.\n'
    pass
  
# ####################################
class RefDBmapError:
    def __init__(self, url):
        print '\nERROR accessing RefDB-PubDBs map at '+url+'\n'
        pass 

# ####################################
class NoPHPError:
    def __init__(self, url):
        #print '\nERROR accessing PHP at '+url+' \n'
        print 'ERROR accessing PHP: ',url,' \n'
        pass
  
# ####################################
class pubDBResult:
    def __init__(self,
                 contents):
        self.contents=contents

    
    def dump(self):
        print 'Contents : ',self.contents
        pass

# ####################################
# class to access PubDBs
class PubDB:
    def __init__(self, owner, dataset, dataTiers, cfg_params):

#       Attributes
        self.owner = owner
        self.dataset = dataset
        self.dataTiers = dataTiers
        self.NeededdataTiers=[]
        self.cfg_params = cfg_params
    
        self.RefDBurl_ = 'http://cmsdoc.cern.ch/cms/production/www/'
        self.RefDBphp_ = 'PubDB/GetIdCollection.php'
        self.RefDBMotherphp_ = 'cgi/SQL/CollectionTree.php'

        self.PubDBCentralUrl_ = 'http://cmsdoc.cern.ch/cms/production/www/PubDB/'
        self.PubDBCentralPhp_ = 'GetPublishedCollectionInfoFromRefDB.php'

        self.PubDBAnalysisPhp_ = 'get-pubdb-analysisinfo.php'
        self.PubDBAnalysisPhpOld_ = 'get-pubdb-analysisinfo.php'
    
##      link to the modified RefDB-PubDBs map script that allow the display option
        self.RefDBPubDBsmapPhp_ = 'GetPublishedCollectionInfoFromRefDB.mod.php?display=1'

#       Costructor procedures

        CEBlackList = []
        try:
            tmpBad = string.split(self.cfg_params['EDG.ce_black_list'],',')
            #tmpBad = ['fnal']
            for tmp in tmpBad:
                tmp=string.strip(tmp)
                if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch              
                CEBlackList.append(tmp)
        except KeyError:
            pass
        common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
        self.reCEBlackList=[]
        for bad in CEBlackList:
            self.reCEBlackList.append(re.compile( bad ))
        #print 'ReBad: ',self.reCEBlackList


########################################################################
    def findAllCollections(self):
        """
        Contact RefDB and find the CollID of all the user required collections 
        """
        ## download from RefDB all the info about the given dataset-owner  
        refdb=RefDBInfo(self.owner,self.dataset)
        #print refdb.GetRefDBInfo()
        try:
            collInfos=refdb.GetRefDBInfo()
        except :
            sys.exit(10)
        #print "collInfos=", collInfos 
        
        first=1
        NeededCollID=[]
        refdbdataTiers=[]
        for coll in collInfos:
            ## select the primary collection
            if first:
                NeededCollID.append(coll[0])
                self.NeededdataTiers.append(coll[2])
                refdbdataTiers.append(coll[2])
                common.logger.message("\n --> primary collection for owner "+self.owner+" is: ID="+coll[0]+" DataTier="+coll[2])
                first=0
            else:
                ## select only the parents collections corresponding to data-tiers requested by the user 
                if  self.dataTiers.count(coll[2]):
                    NeededCollID.append(coll[0])
                    self.NeededdataTiers.append(coll[2])
                    common.logger.message(" --> further collection required: ID="+coll[0]+" DataTier="+coll[2])
                refdbdataTiers.append(coll[2])
           
        ## check that the user asks for Data Tier really existing in RefDB, otherwise give a warning message
        for dt in self.dataTiers:
            if refdbdataTiers.count(dt)<=0:
                msg = "ERROR: Data Tier ( =>",dt,"<= ) not existing for dataset/owner "+self.dataset+"/"+self.owner+"! "
                msg = str(msg) + 'Owner Dataset not published with asked dataTiers! '+\
                       self.owner+' '+ self.dataset+' '+self.dataTiers
                msg = str(msg) + ' Check the data_tier variable in crab.cfg !\n'
                common.logger.message(msg) 
                return []
        
        #print 'Needed Collections are ', NeededCollID
        #return collInfos
        #print "NeededCollID= ", NeededCollID
        return NeededCollID
  
########################################################################
    def findPubDBsbyCollID(self,CollID):
        """
         Find the list of PubDB URLs having a given Collection 
        """
        ### contact the RefDB-PubDBs map to discovery where the given CollID is
        url = self.PubDBCentralUrl_+self.RefDBPubDBsmapPhp_+'&CollID=' + CollID
        # print "%s"%(url)
        try:
            f = urllib.urlopen(url)
        except IOError:
            # print 'Cannot access URL: '+url
            raise RefDBmapError(url)
        
        ### search for the PubDBURL string
        reURLLine=re.compile( r'PubDBURL=(\S*)' )
       
        PubDBURLs = []
        for line in f.readlines():
            #print '#',line,'#'
            if reURLLine.search(line) :
                URLLine=reURLLine.search(line).group()
                #print  string.split(URLLine,'=')[1]
                PubDBURLs.append(string.split(URLLine,'=')[1])
        
        ### return the list of PubDBURL where the collection is present
        #return PubDBURLs 
        return  self.uniquelist(PubDBURLs)
  
################################################################
    def findPubDBs(self,CollIDs):
        """
         Find the list of PubDB URLs having ALL the required collections
        """
        ### loop over all the required collections 
        #pubdbmap={}
        allurls=[]
        countColl=0
        for CollID in CollIDs :
            countColl=countColl+1
            ### map the CollectionID with the list of PubDB URLs
            #pubdbmap[CollID]=self.findPubDBsbyCollID(CollID)
            ### prepare a list all PubDB urls for all collections  
            allurls.extend(self.findPubDBsbyCollID(CollID))
        #print pubdbmap.values()
       
        ### select only PubDB urls that contains all the collections
        unique_urls=self.uniquelist(allurls)
        SelectedPubDBURLs=[]
        # loop on a unique list of PubDB urls
        for url in unique_urls :
            # check that PubDBurl occurrance is the same as the number of collections 
            if ( allurls.count(url)==countColl ) :
                SelectedPubDBURLs.append(url)
       
        #print 'Required Collections',CollIDs,'are all present in PubDBURLs : ',SelectedPubDBURLs,'\n'
  ####  check based on CE black list: select only PubDB not in the CE black list   
        GoodPubDBURLs=self.checkBlackList(SelectedPubDBURLs)
        return GoodPubDBURLs

#######################################################################
    def uniquelist(self, old):
        """
         remove duplicates from a list
        """
        nd={}
        for e in old:
            nd[e]=0
        return nd.keys()
 
#######################################################################
    def checkBlackList(self, pubDBUrls):
        """
        select PubDB URLs that are at site not exluded by the user (via CE black list) 
        """
        goodurls = []
        for url in pubDBUrls:
            common.logger.debug(10,'connecting to the URL '+url)
            good=1
            for re in self.reCEBlackList:
                if re.search(url):
                    common.logger.message('CE in black list, skipping PubDB URL '+url)
                    good=0
                pass
            if good: goodurls.append(url)
        if len(goodurls) == 0:
            common.logger.debug(3,"No selected PubDB URLs")
        return goodurls

########################################################################
    def checkPubDBNewVersion(self, baseurl):
        """
        Check PubDB version to find out if it's new-style or old-style
        """
### check based on the existance of pubdb-get-version.php
        urlversion=baseurl+'pubdb-get-version.php'
        newversion=1;
        try:
         v = urllib2.urlopen(urlversion)
        except urllib2.URLError, msg:
          #print "WARNING: no URL to get PubDB version "
          newversion=0;
      
        if (newversion) :
         schemaversion = v.read()
         #print schemaversion;
   
        return newversion 

########################################################################
    def getPubDBData(self, CollIDs, url , newversion):
        """
         Contact a PubDB to collect all the relevant information
        """
        result = []
        
### get the base PubDb url 
        end=string.rfind(url,'/')
        lastEq=string.rfind(url,'=')

        if (newversion) :
### from PubDB V4 : get info for all the collections in one shot and unserialize the content
           Collections=string.join(CollIDs,'-')
           ## add the PU among the required Collections if the Digi are requested
           # ( for the time being asking it directly to the PubDB so the RefDB
           # level data discovery is bypassed..... in future when every site
           # will have the new style it will be possible to ask for PU , at RefDB level, in method findAllCollections ) 
           if ( self.NeededdataTiers.count('Digi') ):
             PUCollID=self.getDatatierCollID(url[:end+1],Collections,"PU")
             if (PUCollID) : CollIDs.append(PUCollID)
           ##
           Collections=string.join(CollIDs,'-')
           ### download from PubDB all the info about the given collections
           pubdb_analysis=PubDBInfo(url[:end+1],Collections)
           #print pubdb_analysis.GetPubDBInfo()
           ok=0
           try:
             catInfos=pubdb_analysis.GetPubDBInfo()
             ok=1
           except :
             #print "WARNING: can't get PubDB content out of "+url[:end+1]+"\n"
             print '\nERROR extracting info for collections '+Collections+' from PubDB '+url[:end+1]+'. The PU might not be published at that site.\n'
             #raise PubDBGetAnalysisError(url[:end+1],Collections)   
           if (ok): result=catInfos;

        else:

### before PubDB V4 : get info for each collection and read the key-value pair text
              
          for CollID in CollIDs:
            urlphp=url[:end+1]+self.PubDBAnalysisPhp_+'?CollID='+CollID
            # print 'PHP URL: '+urlphp+' \n'

            reOld=re.compile( r'V24' )
            #print urlphp,'Old PubDB ',reOld.search(urlphp)
            if reOld.search(urlphp):
                raise NoPHPError(urlphp)
            else:
                try:
                    f = urllib2.urlopen(urlphp) 
                except urllib2.URLError, msg:
                    print "WARNING: ", msg 
                    raise PubDBError(urlphp)
                except urllib2.HTTPError, msg:
                    print "WARNING: ", msg
                    raise NoPHPError(urlphp)
                content = f.read()
                result.append(pubDBResult(content))
                #print "Coll",CollID," content ",content
                pass
            pass
        
        #print '.....'
        #for r in result:
        #     r.dump()
        #print '.....'
        return result

########################################################################
    def getDatatierCollID(self,urlbase,CollIDString,datatier):
        """
        Contact a script of PubDB to retrieve the collid a DataTier
        """
        try:
          f = urllib.urlopen(urlbase+'pubdb-get-collidbydatatier.php?collid='+CollIDString+"&datatier="+datatier)
        except IOError:
          raise PubDBGetAnalysisError(url[:end+1]+'pubdb-get-collidbydatatier.php',CollIDString)
        data = f.read()
        colldata=re.compile(r'collid=(\S*)').search(data);
        if colldata:
           datatier_CollID=colldata.group(1)
#           print " --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier
           common.logger.message(" --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier)

           return datatier_CollID       
 
########################################################################
    def getAllPubDBData(self):
        """
         Contact a list of PubDB to collect all the relevant information
        """
        newPubDBResult=[]
        oldPubDBResult=[]
        Result={}

### find the user-required collection IDs 
        CollIDs = self.findAllCollections()
### find the PubDB URLs publishing the needed data 
        urllist = self.findPubDBs(CollIDs)
### collect information sparatelly from new-style PubDBs and old-style PubDBs
        for pubdburl in urllist: 
            end=string.rfind(pubdburl,'/')
            newversion=self.checkPubDBNewVersion(pubdburl[:end+1])
            if (newversion):
              res=self.getPubDBData(CollIDs,pubdburl,newversion)
              if len(res)>0:
               newPubDBResult.append(res)
            else:
              resold=self.getPubDBData(CollIDs,pubdburl,newversion)
              if len(resold)>0:
               oldPubDBResult.append(resold)
### fill a dictionary with all the PubBDs results both old-style and new-style
        Result['newPubDB']=newPubDBResult
        Result['oldPubDB']=oldPubDBResult

        ## print for debugging purpose
        #
        #for PubDBversion in Result.keys():
            #print ("key %s, val %s" %(PubDBversion,Result[PubDBversion]))
        #    if len(Result[PubDBversion])>0 :
               #print (" key %s"%(PubDBversion)) 
        #       for result in Result[PubDBversion]:
        #          for r in result:
                      #r.dump()
        #              common.log.write('----------------- \n')
              #print '.....................................'

        return Result

####################################################################
Revision:	1.6
Committed:	Tue Aug 23 11:14:24 2005 UTC (19 years, 8 months ago) by slacapra
Content type:	text/x-python
Branch:	MAIN
CVS Tags:	CRAB_1_0_0_beta4, CRAB_1_0_0_pre1_boss_2, CRAB_1_0_0_pre1_boss, CRAB_1_0_0_pre3, CRAB_1_0_0_pre2
Changes since 1.5:	+1 -0 lines
Log Message:	add PubDBInfo
#	Content
1	#!/usr/bin/env python2
2	import sys, os, string, re
3	import urllib, urllister
4	import urllib2
5	import common
6	from RefDBInfo import RefDBInfo
7	from PubDBInfo import *
8
9	# ####################################
10	class PubDBError:
11	def __init__(self, url):
12	print '\nERROR accessing PubDB at '+url+'\n'
13	pass
14
15	# ####################################
16	class PubDBGetAnalysisError:
17	def __init__(self, url,Collections):
18	print '\nERROR extracting info for collections '+Collections+' from PubDB '+url+'.\n'
19	pass
20
21	# ####################################
22	class RefDBmapError:
23	def __init__(self, url):
24	print '\nERROR accessing RefDB-PubDBs map at '+url+'\n'
25	pass
26
27	# ####################################
28	class NoPHPError:
29	def __init__(self, url):
30	#print '\nERROR accessing PHP at '+url+' \n'
31	print 'ERROR accessing PHP: ',url,' \n'
32	pass
33
34	# ####################################
35	class pubDBResult:
36	def __init__(self,
37	contents):
38	self.contents=contents
39
40
41	def dump(self):
42	print 'Contents : ',self.contents
43	pass
44
45	# ####################################
46	# class to access PubDBs
47	class PubDB:
48	def __init__(self, owner, dataset, dataTiers, cfg_params):
49
50	# Attributes
51	self.owner = owner
52	self.dataset = dataset
53	self.dataTiers = dataTiers
54	self.NeededdataTiers=[]
55	self.cfg_params = cfg_params
56
57	self.RefDBurl_ = 'http://cmsdoc.cern.ch/cms/production/www/'
58	self.RefDBphp_ = 'PubDB/GetIdCollection.php'
59	self.RefDBMotherphp_ = 'cgi/SQL/CollectionTree.php'
60
61	self.PubDBCentralUrl_ = 'http://cmsdoc.cern.ch/cms/production/www/PubDB/'
62	self.PubDBCentralPhp_ = 'GetPublishedCollectionInfoFromRefDB.php'
63
64	self.PubDBAnalysisPhp_ = 'get-pubdb-analysisinfo.php'
65	self.PubDBAnalysisPhpOld_ = 'get-pubdb-analysisinfo.php'
66
67	## link to the modified RefDB-PubDBs map script that allow the display option
68	self.RefDBPubDBsmapPhp_ = 'GetPublishedCollectionInfoFromRefDB.mod.php?display=1'
69
70	# Costructor procedures
71
72	CEBlackList = []
73	try:
74	tmpBad = string.split(self.cfg_params['EDG.ce_black_list'],',')
75	#tmpBad = ['fnal']
76	for tmp in tmpBad:
77	tmp=string.strip(tmp)
78	if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch
79	CEBlackList.append(tmp)
80	except KeyError:
81	pass
82	common.logger.debug(5,'CEBlackList: '+str(CEBlackList))
83	self.reCEBlackList=[]
84	for bad in CEBlackList:
85	self.reCEBlackList.append(re.compile( bad ))
86	#print 'ReBad: ',self.reCEBlackList
87
88
89	########################################################################
90	def findAllCollections(self):
91	"""
92	Contact RefDB and find the CollID of all the user required collections
93	"""
94	## download from RefDB all the info about the given dataset-owner
95	refdb=RefDBInfo(self.owner,self.dataset)
96	#print refdb.GetRefDBInfo()
97	try:
98	collInfos=refdb.GetRefDBInfo()
99	except :
100	sys.exit(10)
101	#print "collInfos=", collInfos
102
103	first=1
104	NeededCollID=[]
105	refdbdataTiers=[]
106	for coll in collInfos:
107	## select the primary collection
108	if first:
109	NeededCollID.append(coll[0])
110	self.NeededdataTiers.append(coll[2])
111	refdbdataTiers.append(coll[2])
112	common.logger.message("\n --> primary collection for owner "+self.owner+" is: ID="+coll[0]+" DataTier="+coll[2])
113	first=0
114	else:
115	## select only the parents collections corresponding to data-tiers requested by the user
116	if self.dataTiers.count(coll[2]):
117	NeededCollID.append(coll[0])
118	self.NeededdataTiers.append(coll[2])
119	common.logger.message(" --> further collection required: ID="+coll[0]+" DataTier="+coll[2])
120	refdbdataTiers.append(coll[2])
121
122	## check that the user asks for Data Tier really existing in RefDB, otherwise give a warning message
123	for dt in self.dataTiers:
124	if refdbdataTiers.count(dt)<=0:
125	msg = "ERROR: Data Tier ( =>",dt,"<= ) not existing for dataset/owner "+self.dataset+"/"+self.owner+"! "
126	msg = str(msg) + 'Owner Dataset not published with asked dataTiers! '+\
127	self.owner+' '+ self.dataset+' '+self.dataTiers
128	msg = str(msg) + ' Check the data_tier variable in crab.cfg !\n'
129	common.logger.message(msg)
130	return []
131
132	#print 'Needed Collections are ', NeededCollID
133	#return collInfos
134	#print "NeededCollID= ", NeededCollID
135	return NeededCollID
136
137	########################################################################
138	def findPubDBsbyCollID(self,CollID):
139	"""
140	Find the list of PubDB URLs having a given Collection
141	"""
142	### contact the RefDB-PubDBs map to discovery where the given CollID is
143	url = self.PubDBCentralUrl_+self.RefDBPubDBsmapPhp_+'&CollID=' + CollID
144	# print "%s"%(url)
145	try:
146	f = urllib.urlopen(url)
147	except IOError:
148	# print 'Cannot access URL: '+url
149	raise RefDBmapError(url)
150
151	### search for the PubDBURL string
152	reURLLine=re.compile( r'PubDBURL=(\S*)' )
153
154	PubDBURLs = []
155	for line in f.readlines():
156	#print '#',line,'#'
157	if reURLLine.search(line) :
158	URLLine=reURLLine.search(line).group()
159	#print string.split(URLLine,'=')[1]
160	PubDBURLs.append(string.split(URLLine,'=')[1])
161
162	### return the list of PubDBURL where the collection is present
163	#return PubDBURLs
164	return self.uniquelist(PubDBURLs)
165
166	################################################################
167	def findPubDBs(self,CollIDs):
168	"""
169	Find the list of PubDB URLs having ALL the required collections
170	"""
171	### loop over all the required collections
172	#pubdbmap={}
173	allurls=[]
174	countColl=0
175	for CollID in CollIDs :
176	countColl=countColl+1
177	### map the CollectionID with the list of PubDB URLs
178	#pubdbmap[CollID]=self.findPubDBsbyCollID(CollID)
179	### prepare a list all PubDB urls for all collections
180	allurls.extend(self.findPubDBsbyCollID(CollID))
181	#print pubdbmap.values()
182
183	### select only PubDB urls that contains all the collections
184	unique_urls=self.uniquelist(allurls)
185	SelectedPubDBURLs=[]
186	# loop on a unique list of PubDB urls
187	for url in unique_urls :
188	# check that PubDBurl occurrance is the same as the number of collections
189	if ( allurls.count(url)==countColl ) :
190	SelectedPubDBURLs.append(url)
191
192	#print 'Required Collections',CollIDs,'are all present in PubDBURLs : ',SelectedPubDBURLs,'\n'
193	#### check based on CE black list: select only PubDB not in the CE black list
194	GoodPubDBURLs=self.checkBlackList(SelectedPubDBURLs)
195	return GoodPubDBURLs
196
197	#######################################################################
198	def uniquelist(self, old):
199	"""
200	remove duplicates from a list
201	"""
202	nd={}
203	for e in old:
204	nd[e]=0
205	return nd.keys()
206
207	#######################################################################
208	def checkBlackList(self, pubDBUrls):
209	"""
210	select PubDB URLs that are at site not exluded by the user (via CE black list)
211	"""
212	goodurls = []
213	for url in pubDBUrls:
214	common.logger.debug(10,'connecting to the URL '+url)
215	good=1
216	for re in self.reCEBlackList:
217	if re.search(url):
218	common.logger.message('CE in black list, skipping PubDB URL '+url)
219	good=0
220	pass
221	if good: goodurls.append(url)
222	if len(goodurls) == 0:
223	common.logger.debug(3,"No selected PubDB URLs")
224	return goodurls
225
226	########################################################################
227	def checkPubDBNewVersion(self, baseurl):
228	"""
229	Check PubDB version to find out if it's new-style or old-style
230	"""
231	### check based on the existance of pubdb-get-version.php
232	urlversion=baseurl+'pubdb-get-version.php'
233	newversion=1;
234	try:
235	v = urllib2.urlopen(urlversion)
236	except urllib2.URLError, msg:
237	#print "WARNING: no URL to get PubDB version "
238	newversion=0;
239
240	if (newversion) :
241	schemaversion = v.read()
242	#print schemaversion;
243
244	return newversion
245
246	########################################################################
247	def getPubDBData(self, CollIDs, url , newversion):
248	"""
249	Contact a PubDB to collect all the relevant information
250	"""
251	result = []
252
253	### get the base PubDb url
254	end=string.rfind(url,'/')
255	lastEq=string.rfind(url,'=')
256
257	if (newversion) :
258	### from PubDB V4 : get info for all the collections in one shot and unserialize the content
259	Collections=string.join(CollIDs,'-')
260	## add the PU among the required Collections if the Digi are requested
261	# ( for the time being asking it directly to the PubDB so the RefDB
262	# level data discovery is bypassed..... in future when every site
263	# will have the new style it will be possible to ask for PU , at RefDB level, in method findAllCollections )
264	if ( self.NeededdataTiers.count('Digi') ):
265	PUCollID=self.getDatatierCollID(url[:end+1],Collections,"PU")
266	if (PUCollID) : CollIDs.append(PUCollID)
267	##
268	Collections=string.join(CollIDs,'-')
269	### download from PubDB all the info about the given collections
270	pubdb_analysis=PubDBInfo(url[:end+1],Collections)
271	#print pubdb_analysis.GetPubDBInfo()
272	ok=0
273	try:
274	catInfos=pubdb_analysis.GetPubDBInfo()
275	ok=1
276	except :
277	#print "WARNING: can't get PubDB content out of "+url[:end+1]+"\n"
278	print '\nERROR extracting info for collections '+Collections+' from PubDB '+url[:end+1]+'. The PU might not be published at that site.\n'
279	#raise PubDBGetAnalysisError(url[:end+1],Collections)
280	if (ok): result=catInfos;
281
282	else:
283
284	### before PubDB V4 : get info for each collection and read the key-value pair text
285
286	for CollID in CollIDs:
287	urlphp=url[:end+1]+self.PubDBAnalysisPhp_+'?CollID='+CollID
288	# print 'PHP URL: '+urlphp+' \n'
289
290	reOld=re.compile( r'V24' )
291	#print urlphp,'Old PubDB ',reOld.search(urlphp)
292	if reOld.search(urlphp):
293	raise NoPHPError(urlphp)
294	else:
295	try:
296	f = urllib2.urlopen(urlphp)
297	except urllib2.URLError, msg:
298	print "WARNING: ", msg
299	raise PubDBError(urlphp)
300	except urllib2.HTTPError, msg:
301	print "WARNING: ", msg
302	raise NoPHPError(urlphp)
303	content = f.read()
304	result.append(pubDBResult(content))
305	#print "Coll",CollID," content ",content
306	pass
307	pass
308
309	#print '.....'
310	#for r in result:
311	# r.dump()
312	#print '.....'
313	return result
314
315	########################################################################
316	def getDatatierCollID(self,urlbase,CollIDString,datatier):
317	"""
318	Contact a script of PubDB to retrieve the collid a DataTier
319	"""
320	try:
321	f = urllib.urlopen(urlbase+'pubdb-get-collidbydatatier.php?collid='+CollIDString+"&datatier="+datatier)
322	except IOError:
323	raise PubDBGetAnalysisError(url[:end+1]+'pubdb-get-collidbydatatier.php',CollIDString)
324	data = f.read()
325	colldata=re.compile(r'collid=(\S*)').search(data);
326	if colldata:
327	datatier_CollID=colldata.group(1)
328	# print " --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier
329	common.logger.message(" --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier)
330
331	return datatier_CollID
332
333	########################################################################
334	def getAllPubDBData(self):
335	"""
336	Contact a list of PubDB to collect all the relevant information
337	"""
338	newPubDBResult=[]
339	oldPubDBResult=[]
340	Result={}
341
342	### find the user-required collection IDs
343	CollIDs = self.findAllCollections()
344	### find the PubDB URLs publishing the needed data
345	urllist = self.findPubDBs(CollIDs)
346	### collect information sparatelly from new-style PubDBs and old-style PubDBs
347	for pubdburl in urllist:
348	end=string.rfind(pubdburl,'/')
349	newversion=self.checkPubDBNewVersion(pubdburl[:end+1])
350	if (newversion):
351	res=self.getPubDBData(CollIDs,pubdburl,newversion)
352	if len(res)>0:
353	newPubDBResult.append(res)
354	else:
355	resold=self.getPubDBData(CollIDs,pubdburl,newversion)
356	if len(resold)>0:
357	oldPubDBResult.append(resold)
358	### fill a dictionary with all the PubBDs results both old-style and new-style
359	Result['newPubDB']=newPubDBResult
360	Result['oldPubDB']=oldPubDBResult
361
362	## print for debugging purpose
363	#
364	#for PubDBversion in Result.keys():
365	#print ("key %s, val %s" %(PubDBversion,Result[PubDBversion]))
366	# if len(Result[PubDBversion])>0 :
367	#print (" key %s"%(PubDBversion))
368	# for result in Result[PubDBversion]:
369	# for r in result:
370	#r.dump()
371	# common.log.write('----------------- \n')
372	#print '.....................................'
373
374	return Result
375
376	####################################################################