ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PubDBInfo.py
Revision: 1.1
Committed: Tue Aug 23 10:57:51 2005 UTC (19 years, 8 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
Log Message:
add files for pre2

File Contents

# Content
1 #!/usr/bin/env python
2 import sys, os, string, re
3 import urllib, urllister
4 import urllib2
5 from UnserializePHP import *
6 from orcarcBuilderNew import *
7
8 class PubDBInfoError:
9 def __init__(self, Collections):
10 print '\nERROR accessing PubDB for Collections: '+Collections+'\n'
11 pass
12 class PubDBInfoNoCollectionError:
13 def __init__(self, Collections):
14 print '\nERROR No Collections found in PubDB : '+Collections+'\n'
15 pass
16 class NoPHPError:
17 def __init__(self, url):
18 #print '\nERROR accessing PHP at '+url+' \n'
19 print 'ERROR accessing PHP: ',url,'isn\'t updated version \n'
20 pass
21 class PubDBInfoResult:
22 def __init__(self,
23 contents):
24 self.contents=contents
25
26 ################################################################################
27 # Class to connect to PubDB interface for the analysis and download the data in one shot using the serialized PHP data.
28 ################################################################################
29 class PubDBInfo:
30 def __init__(self, pubdburl, Collections):
31 self.Collections= Collections
32 self.PubDBurl_ = pubdburl
33 #self.PubDBInfophp_ = 'pubdb-get-analisys-info.php'
34 self.PubDBInfophp_ = 'pubdb-get-analysisinfo.php'
35 self.protocolPrio_ = ['http', 'rfio', 'mysql' , 'gridftp']
36 #self.protocolPrio_ = ['http', 'mysql' , 'rfio' , 'gridftp']
37
38 ##########################################################################
39 def GetPubDBInfo(self):
40 """
41 Get all the catalogues-related info from PubDB and select the best ones if multiple choices are possible
42 """
43 ### extract catalogues related info from pubDB
44 cataloguecoll_map = self.ExtractPubDBInfo()
45 ### select the Best catalogues
46 cataloguesinfos=self.SelectBestPubDBInfo(cataloguecoll_map)
47 return cataloguesinfos
48
49 #########################################################################
50 def ExtractPubDBInfo(self):
51 """
52 Extract all the information from the PubDB analysis interface
53 """
54 try:
55 #print " contacting PubDb... "+self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections+"\n"
56 f = urllib.urlopen(self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections)
57 except IOError:
58 raise PubDBInfoError(self.Collections)
59
60 data = f.read()
61 #print data
62 if len(data)>0:
63 if data[0]=='<':
64 raise PubDBInfoNoCollectionError(self.Collections)
65 try:
66 catalogues = PHPUnserialize().unserialize(data)
67 except IOError:
68 raise PHPUnserializeError(data)
69 try:
70 catinfos=[]
71 collmap={}
72 for k in catalogues.keys():
73 CollId=catalogues[k]['CollectionId']
74 colllist=[]
75 #print ">>> Catalogues for Collection: "+CollId+"\n"
76
77 cat=catalogues[k]['Catalogue']
78 for kcat in cat.keys():
79 ##print ("key %s, val %s" %(kcat,cat[kcat]))
80 ContactString=cat[kcat]['ContactString']
81 ContactProtocol=cat[kcat]['ContactProtocol']
82 CatalogueType=cat[kcat]['CatalogueType']
83 ValidationStatus=cat[kcat]['ValidationStatus']
84 #print "CS: "+ContactString
85 #print "CP: "+ContactProtocol
86 #print "CT: "+CatalogueType
87 #print "VS: "+ValidationStatus
88 ce=cat[kcat]['CEs']
89 CElist=[]
90 for kce in ce.keys():
91 ##print ("key %s, val %s" %(kce,ce[kce]))
92 CE=ce[kce]
93 CElist.append(ce[kce])
94 #print " CE list :"
95 #for aCE in CElist:
96 # print " CE : "+aCE
97 cc=cat[kcat]['CatalogueContents']
98 for kcc in cc.keys():
99 ##print ("key %s, val %s" %(kcc,cc[kcc]))
100 FileType=cc[kcc]['FileType']
101 SE=cc[kcc]['SE']
102 #print "FT: "+FileType
103 #print "SE: "+SE
104 if cc[kcc]['Variables']==None:
105 Variables=''
106 else:
107 for kvar in cc[kcc]['Variables'].keys():
108 Variables=kvar+"="+cc[kcc]['Variables'][kvar]
109 #print "Variables: "+Variables
110 run=cc[kcc]['RunRange']
111 for krun in run.keys():
112 ##print ("key %s, val %s" %(krun,run[krun]))
113 reTot = re.compile(r'TotalEvents=(\d*)')
114 TotalEvents=reTot.search(run[krun]).group(1)
115 reFirst= re.compile(r'FirstRun=(\d*)')
116 FirstRun = reFirst.search(run[krun]).group(1)
117 reLast= re.compile(r'LastRun=(\d*)')
118 LastRun = reLast.search(run[krun]).group(1)
119 #print "Nevents: "+TotalEvents
120 #print "First: "+FirstRun
121 #print "Last: "+LastRun
122 #print "----------------------------------"
123
124 ## fill a catlogue entry
125 acatalogue=catalogEntryNew(FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
126 ## list the catalogues belonging to a given collection
127 colllist.append(acatalogue)
128
129 ## dictionary grouping catalogues by CollectionID
130 collmap[CollId]=colllist
131
132 except IOError:
133 raise PHPUnserializeError(data)
134
135 return collmap
136
137 ########################################################################
138 def SelectBestPubDBInfo(self,cataloglist):
139 """
140 Select the lists of needed catalogues (from a set of catalogues refering to the same collection and FileType selects the best on based on protocol)
141 """
142
143 selectcatalogues=[]
144
145 ### for each collection
146 for collid in cataloglist.keys():
147 #print ("key %s, val %s" %(collid,cataloglist[collid]))
148 ### get all the possible FileTypes
149 filetypes=[]
150 for catalog in cataloglist[collid]:
151 if catalog.FileType not in filetypes :
152 filetypes.append(catalog.FileType)
153 ### dictionary grouping catalogues by FileType
154 ftmap={}
155 for afiletype in filetypes:
156 #print ' filetype is '+afiletype+' for collid='+collid
157 sameFileType=[]
158 for catalog in cataloglist[collid]:
159 if catalog.FileType==afiletype :
160 sameFileType.append(catalog)
161 ftmap[afiletype]=sameFileType
162 ### select only one catalogue among the catalouges with the same FileType
163 for ft in ftmap.keys():
164 #print ("key %s, val %s" %(ft,ftmap[ft]))
165 bestcatalog=self.SelectBestCatalog(ftmap[ft])
166 selectcatalogues.append(bestcatalog)
167
168 ### return the minimal list of needed catalogues
169 return selectcatalogues
170
171 ####################################################
172 def SelectBestCatalog(self,ftcat):
173 """
174 From a set of catalogues with the same FileTypeand different access protocol, select the one according to access protocols
175 """
176 sortedProtocols = self.protocolPrio_
177
178 ### if just one catalog, just return it!
179 if (len(ftcat)==1):
180 #print '----- Just one catalogue, no selection based on protocol needed'
181 #ftcat[0].dump()
182 #print '---------------------'
183 return ftcat[0]
184 ### oterwise select the best catalogue based on protocol
185 for prot in sortedProtocols:
186 for cat in ftcat:
187 if cat.ContactProtocol==prot:
188 #print '----- Catalogue selected based on protocol : '+prot
189 #cat.dump()
190 #print '---------------------'
191 return cat
192