ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PubDBInfo.py
Revision: 1.5
Committed: Thu Dec 15 13:26:23 2005 UTC (19 years, 4 months ago) by afanfani
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_1_3_0_pre5, CRAB_1_3_0_pre4, HEAD_20092006, CRAB_1_2_1, CRAB_1_2_0, CRAB_1_2_0_pre9, CRAB_1_2_0_pre8, CRAB_1_2_0_pre7, post_cmssw_integration_20060527, pre_cmssw_integration_20060527, CRAB_1_1_0, CRAB_1_1_0_pre4, CRAB_1_1_0_pre3, CRAB_1_1_0_pre1, CRAB_1_0_7, CRAB_1_0_7_pre1, CRAB_1_0_6, CRAB_1_0_5, CRAB_1_0_4
Branch point for: CRAB_BOSS4_v1, CRAB_BOSS4
Changes since 1.4: +1 -0 lines
Log Message:
add a check that the catalogue data structure extracted from PubDB
is reall there (=fix to cope with the somehow unusual PHP serialized dta structure in Perugia pubDB)

File Contents

# User Rev Content
1 slacapra 1.1 #!/usr/bin/env python
2     import sys, os, string, re
3     import urllib, urllister
4     import urllib2
5     from UnserializePHP import *
6 slacapra 1.2 from orcarcBuilder import *
7 slacapra 1.1
8     class PubDBInfoError:
9     def __init__(self, Collections):
10     print '\nERROR accessing PubDB for Collections: '+Collections+'\n'
11     pass
12     class PubDBInfoNoCollectionError:
13 afanfani 1.4 def __init__(self, Collections, url):
14     print '\nERROR No Collections '+Collections+' found in PubDB '+url
15 slacapra 1.1 pass
16     class NoPHPError:
17     def __init__(self, url):
18     #print '\nERROR accessing PHP at '+url+' \n'
19     print 'ERROR accessing PHP: ',url,'isn\'t updated version \n'
20     pass
21     class PubDBInfoResult:
22     def __init__(self,
23     contents):
24     self.contents=contents
25    
26     ################################################################################
27     # Class to connect to PubDB interface for the analysis and download the data in one shot using the serialized PHP data.
28     ################################################################################
29     class PubDBInfo:
30     def __init__(self, pubdburl, Collections):
31     self.Collections= Collections
32 slacapra 1.3 self.PrimaryCollID=string.split(Collections,'-')[0]
33 slacapra 1.1 self.PubDBurl_ = pubdburl
34     self.PubDBInfophp_ = 'pubdb-get-analysisinfo.php'
35     self.protocolPrio_ = ['http', 'rfio', 'mysql' , 'gridftp']
36    
37     ##########################################################################
38     def GetPubDBInfo(self):
39     """
40     Get all the catalogues-related info from PubDB and select the best ones if multiple choices are possible
41     """
42     ### extract catalogues related info from pubDB
43     cataloguecoll_map = self.ExtractPubDBInfo()
44     ### select the Best catalogues
45     cataloguesinfos=self.SelectBestPubDBInfo(cataloguecoll_map)
46     return cataloguesinfos
47    
48     #########################################################################
49     def ExtractPubDBInfo(self):
50     """
51     Extract all the information from the PubDB analysis interface
52     """
53     try:
54     #print " contacting PubDb... "+self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections+"\n"
55     f = urllib.urlopen(self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections)
56     except IOError:
57     raise PubDBInfoError(self.Collections)
58    
59     data = f.read()
60     #print data
61     if len(data)>0:
62     if data[0]=='<':
63 afanfani 1.4 raise PubDBInfoNoCollectionError(self.Collections,self.PubDBurl_+self.PubDBInfophp_)
64 slacapra 1.1 try:
65     catalogues = PHPUnserialize().unserialize(data)
66     except IOError:
67     raise PHPUnserializeError(data)
68     try:
69     catinfos=[]
70     collmap={}
71     for k in catalogues.keys():
72     CollId=catalogues[k]['CollectionId']
73 slacapra 1.3 ## get also the collection type
74     CollType=catalogues[k]['CollectionType']
75     ## set primary collection flag
76     PrimaryCollFlag=0
77     if ( CollId == self.PrimaryCollID ) : PrimaryCollFlag=1
78 slacapra 1.1 colllist=[]
79     #print ">>> Catalogues for Collection: "+CollId+"\n"
80    
81     cat=catalogues[k]['Catalogue']
82     for kcat in cat.keys():
83 afanfani 1.5 if cat[kcat]:
84 slacapra 1.1 ##print ("key %s, val %s" %(kcat,cat[kcat]))
85     ContactString=cat[kcat]['ContactString']
86     ContactProtocol=cat[kcat]['ContactProtocol']
87     CatalogueType=cat[kcat]['CatalogueType']
88     ValidationStatus=cat[kcat]['ValidationStatus']
89     #print "CS: "+ContactString
90     #print "CP: "+ContactProtocol
91     #print "CT: "+CatalogueType
92     #print "VS: "+ValidationStatus
93     ce=cat[kcat]['CEs']
94     CElist=[]
95     for kce in ce.keys():
96     ##print ("key %s, val %s" %(kce,ce[kce]))
97     CE=ce[kce]
98     CElist.append(ce[kce])
99     #print " CE list :"
100     #for aCE in CElist:
101     # print " CE : "+aCE
102     cc=cat[kcat]['CatalogueContents']
103     for kcc in cc.keys():
104     ##print ("key %s, val %s" %(kcc,cc[kcc]))
105     FileType=cc[kcc]['FileType']
106     SE=cc[kcc]['SE']
107     #print "FT: "+FileType
108     #print "SE: "+SE
109     if cc[kcc]['Variables']==None:
110     Variables=''
111     else:
112     for kvar in cc[kcc]['Variables'].keys():
113     Variables=kvar+"="+cc[kcc]['Variables'][kvar]
114     #print "Variables: "+Variables
115     run=cc[kcc]['RunRange']
116     for krun in run.keys():
117     ##print ("key %s, val %s" %(krun,run[krun]))
118     reTot = re.compile(r'TotalEvents=(\d*)')
119     TotalEvents=reTot.search(run[krun]).group(1)
120     reFirst= re.compile(r'FirstRun=(\d*)')
121     FirstRun = reFirst.search(run[krun]).group(1)
122     reLast= re.compile(r'LastRun=(\d*)')
123     LastRun = reLast.search(run[krun]).group(1)
124     #print "Nevents: "+TotalEvents
125     #print "First: "+FirstRun
126     #print "Last: "+LastRun
127     #print "----------------------------------"
128    
129     ## fill a catlogue entry
130 slacapra 1.3 # acatalogue=catalogEntryNew(FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
131     ## store collection type and primarycollection flag
132     acatalogue=catalogEntryNew(CollType,PrimaryCollFlag,FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
133    
134 slacapra 1.1 ## list the catalogues belonging to a given collection
135     colllist.append(acatalogue)
136    
137     ## dictionary grouping catalogues by CollectionID
138     collmap[CollId]=colllist
139    
140     except IOError:
141     raise PHPUnserializeError(data)
142    
143     return collmap
144    
145     ########################################################################
146     def SelectBestPubDBInfo(self,cataloglist):
147     """
148     Select the lists of needed catalogues (from a set of catalogues refering to the same collection and FileType selects the best on based on protocol)
149     """
150    
151     selectcatalogues=[]
152    
153     ### for each collection
154     for collid in cataloglist.keys():
155     #print ("key %s, val %s" %(collid,cataloglist[collid]))
156     ### get all the possible FileTypes
157     filetypes=[]
158     for catalog in cataloglist[collid]:
159     if catalog.FileType not in filetypes :
160     filetypes.append(catalog.FileType)
161     ### dictionary grouping catalogues by FileType
162     ftmap={}
163     for afiletype in filetypes:
164     #print ' filetype is '+afiletype+' for collid='+collid
165     sameFileType=[]
166     for catalog in cataloglist[collid]:
167     if catalog.FileType==afiletype :
168     sameFileType.append(catalog)
169     ftmap[afiletype]=sameFileType
170     ### select only one catalogue among the catalouges with the same FileType
171     for ft in ftmap.keys():
172     #print ("key %s, val %s" %(ft,ftmap[ft]))
173     bestcatalog=self.SelectBestCatalog(ftmap[ft])
174     selectcatalogues.append(bestcatalog)
175    
176     ### return the minimal list of needed catalogues
177     return selectcatalogues
178    
179     ####################################################
180     def SelectBestCatalog(self,ftcat):
181     """
182     From a set of catalogues with the same FileTypeand different access protocol, select the one according to access protocols
183     """
184     sortedProtocols = self.protocolPrio_
185    
186     ### if just one catalog, just return it!
187     if (len(ftcat)==1):
188     #print '----- Just one catalogue, no selection based on protocol needed'
189     #ftcat[0].dump()
190     #print '---------------------'
191     return ftcat[0]
192     ### oterwise select the best catalogue based on protocol
193     for prot in sortedProtocols:
194     for cat in ftcat:
195     if cat.ContactProtocol==prot:
196     #print '----- Catalogue selected based on protocol : '+prot
197     #cat.dump()
198     #print '---------------------'
199     return cat
200