ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PubDBInfo.py
Revision: 1.3
Committed: Thu Oct 13 13:01:57 2005 UTC (19 years, 6 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_1_0_2, CRAB_0_2_2, CRAB_1_0_1, CRAB_1_0_0_rc1, CRAB_1_0_0_beta4
Changes since 1.2: +10 -3 lines
Log Message:
mods by AleF to fix ordering of catalogs

File Contents

# Content
1 #!/usr/bin/env python
2 import sys, os, string, re
3 import urllib, urllister
4 import urllib2
5 from UnserializePHP import *
6 from orcarcBuilder import *
7
8 class PubDBInfoError:
9 def __init__(self, Collections):
10 print '\nERROR accessing PubDB for Collections: '+Collections+'\n'
11 pass
12 class PubDBInfoNoCollectionError:
13 def __init__(self, Collections):
14 print '\nERROR No Collections found in PubDB : '+Collections+'\n'
15 pass
16 class NoPHPError:
17 def __init__(self, url):
18 #print '\nERROR accessing PHP at '+url+' \n'
19 print 'ERROR accessing PHP: ',url,'isn\'t updated version \n'
20 pass
21 class PubDBInfoResult:
22 def __init__(self,
23 contents):
24 self.contents=contents
25
26 ################################################################################
27 # Class to connect to PubDB interface for the analysis and download the data in one shot using the serialized PHP data.
28 ################################################################################
29 class PubDBInfo:
30 def __init__(self, pubdburl, Collections):
31 self.Collections= Collections
32 self.PrimaryCollID=string.split(Collections,'-')[0]
33 self.PubDBurl_ = pubdburl
34 self.PubDBInfophp_ = 'pubdb-get-analysisinfo.php'
35 self.protocolPrio_ = ['http', 'rfio', 'mysql' , 'gridftp']
36
37 ##########################################################################
38 def GetPubDBInfo(self):
39 """
40 Get all the catalogues-related info from PubDB and select the best ones if multiple choices are possible
41 """
42 ### extract catalogues related info from pubDB
43 cataloguecoll_map = self.ExtractPubDBInfo()
44 ### select the Best catalogues
45 cataloguesinfos=self.SelectBestPubDBInfo(cataloguecoll_map)
46 return cataloguesinfos
47
48 #########################################################################
49 def ExtractPubDBInfo(self):
50 """
51 Extract all the information from the PubDB analysis interface
52 """
53 try:
54 #print " contacting PubDb... "+self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections+"\n"
55 f = urllib.urlopen(self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections)
56 except IOError:
57 raise PubDBInfoError(self.Collections)
58
59 data = f.read()
60 #print data
61 if len(data)>0:
62 if data[0]=='<':
63 raise PubDBInfoNoCollectionError(self.Collections)
64 try:
65 catalogues = PHPUnserialize().unserialize(data)
66 except IOError:
67 raise PHPUnserializeError(data)
68 try:
69 catinfos=[]
70 collmap={}
71 for k in catalogues.keys():
72 CollId=catalogues[k]['CollectionId']
73 ## get also the collection type
74 CollType=catalogues[k]['CollectionType']
75 ## set primary collection flag
76 PrimaryCollFlag=0
77 if ( CollId == self.PrimaryCollID ) : PrimaryCollFlag=1
78 colllist=[]
79 #print ">>> Catalogues for Collection: "+CollId+"\n"
80
81 cat=catalogues[k]['Catalogue']
82 for kcat in cat.keys():
83 ##print ("key %s, val %s" %(kcat,cat[kcat]))
84 ContactString=cat[kcat]['ContactString']
85 ContactProtocol=cat[kcat]['ContactProtocol']
86 CatalogueType=cat[kcat]['CatalogueType']
87 ValidationStatus=cat[kcat]['ValidationStatus']
88 #print "CS: "+ContactString
89 #print "CP: "+ContactProtocol
90 #print "CT: "+CatalogueType
91 #print "VS: "+ValidationStatus
92 ce=cat[kcat]['CEs']
93 CElist=[]
94 for kce in ce.keys():
95 ##print ("key %s, val %s" %(kce,ce[kce]))
96 CE=ce[kce]
97 CElist.append(ce[kce])
98 #print " CE list :"
99 #for aCE in CElist:
100 # print " CE : "+aCE
101 cc=cat[kcat]['CatalogueContents']
102 for kcc in cc.keys():
103 ##print ("key %s, val %s" %(kcc,cc[kcc]))
104 FileType=cc[kcc]['FileType']
105 SE=cc[kcc]['SE']
106 #print "FT: "+FileType
107 #print "SE: "+SE
108 if cc[kcc]['Variables']==None:
109 Variables=''
110 else:
111 for kvar in cc[kcc]['Variables'].keys():
112 Variables=kvar+"="+cc[kcc]['Variables'][kvar]
113 #print "Variables: "+Variables
114 run=cc[kcc]['RunRange']
115 for krun in run.keys():
116 ##print ("key %s, val %s" %(krun,run[krun]))
117 reTot = re.compile(r'TotalEvents=(\d*)')
118 TotalEvents=reTot.search(run[krun]).group(1)
119 reFirst= re.compile(r'FirstRun=(\d*)')
120 FirstRun = reFirst.search(run[krun]).group(1)
121 reLast= re.compile(r'LastRun=(\d*)')
122 LastRun = reLast.search(run[krun]).group(1)
123 #print "Nevents: "+TotalEvents
124 #print "First: "+FirstRun
125 #print "Last: "+LastRun
126 #print "----------------------------------"
127
128 ## fill a catlogue entry
129 # acatalogue=catalogEntryNew(FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
130 ## store collection type and primarycollection flag
131 acatalogue=catalogEntryNew(CollType,PrimaryCollFlag,FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
132
133 ## list the catalogues belonging to a given collection
134 colllist.append(acatalogue)
135
136 ## dictionary grouping catalogues by CollectionID
137 collmap[CollId]=colllist
138
139 except IOError:
140 raise PHPUnserializeError(data)
141
142 return collmap
143
144 ########################################################################
145 def SelectBestPubDBInfo(self,cataloglist):
146 """
147 Select the lists of needed catalogues (from a set of catalogues refering to the same collection and FileType selects the best on based on protocol)
148 """
149
150 selectcatalogues=[]
151
152 ### for each collection
153 for collid in cataloglist.keys():
154 #print ("key %s, val %s" %(collid,cataloglist[collid]))
155 ### get all the possible FileTypes
156 filetypes=[]
157 for catalog in cataloglist[collid]:
158 if catalog.FileType not in filetypes :
159 filetypes.append(catalog.FileType)
160 ### dictionary grouping catalogues by FileType
161 ftmap={}
162 for afiletype in filetypes:
163 #print ' filetype is '+afiletype+' for collid='+collid
164 sameFileType=[]
165 for catalog in cataloglist[collid]:
166 if catalog.FileType==afiletype :
167 sameFileType.append(catalog)
168 ftmap[afiletype]=sameFileType
169 ### select only one catalogue among the catalouges with the same FileType
170 for ft in ftmap.keys():
171 #print ("key %s, val %s" %(ft,ftmap[ft]))
172 bestcatalog=self.SelectBestCatalog(ftmap[ft])
173 selectcatalogues.append(bestcatalog)
174
175 ### return the minimal list of needed catalogues
176 return selectcatalogues
177
178 ####################################################
179 def SelectBestCatalog(self,ftcat):
180 """
181 From a set of catalogues with the same FileTypeand different access protocol, select the one according to access protocols
182 """
183 sortedProtocols = self.protocolPrio_
184
185 ### if just one catalog, just return it!
186 if (len(ftcat)==1):
187 #print '----- Just one catalogue, no selection based on protocol needed'
188 #ftcat[0].dump()
189 #print '---------------------'
190 return ftcat[0]
191 ### oterwise select the best catalogue based on protocol
192 for prot in sortedProtocols:
193 for cat in ftcat:
194 if cat.ContactProtocol==prot:
195 #print '----- Catalogue selected based on protocol : '+prot
196 #cat.dump()
197 #print '---------------------'
198 return cat
199