1 |
#!/usr/bin/env python
|
2 |
import sys, os, string, re
|
3 |
import urllib, urllister
|
4 |
import urllib2
|
5 |
from UnserializePHP import *
|
6 |
from orcarcBuilder import *
|
7 |
|
8 |
class PubDBInfoError:
|
9 |
def __init__(self, Collections):
|
10 |
print '\nERROR accessing PubDB for Collections: '+Collections+'\n'
|
11 |
pass
|
12 |
class PubDBInfoNoCollectionError:
|
13 |
def __init__(self, Collections):
|
14 |
print '\nERROR No Collections found in PubDB : '+Collections+'\n'
|
15 |
pass
|
16 |
class NoPHPError:
|
17 |
def __init__(self, url):
|
18 |
#print '\nERROR accessing PHP at '+url+' \n'
|
19 |
print 'ERROR accessing PHP: ',url,'isn\'t updated version \n'
|
20 |
pass
|
21 |
class PubDBInfoResult:
|
22 |
def __init__(self,
|
23 |
contents):
|
24 |
self.contents=contents
|
25 |
|
26 |
################################################################################
|
27 |
# Class to connect to PubDB interface for the analysis and download the data in one shot using the serialized PHP data.
|
28 |
################################################################################
|
29 |
class PubDBInfo:
|
30 |
def __init__(self, pubdburl, Collections):
|
31 |
self.Collections= Collections
|
32 |
self.PrimaryCollID=string.split(Collections,'-')[0]
|
33 |
self.PubDBurl_ = pubdburl
|
34 |
self.PubDBInfophp_ = 'pubdb-get-analysisinfo.php'
|
35 |
self.protocolPrio_ = ['http', 'rfio', 'mysql' , 'gridftp']
|
36 |
|
37 |
##########################################################################
|
38 |
def GetPubDBInfo(self):
|
39 |
"""
|
40 |
Get all the catalogues-related info from PubDB and select the best ones if multiple choices are possible
|
41 |
"""
|
42 |
### extract catalogues related info from pubDB
|
43 |
cataloguecoll_map = self.ExtractPubDBInfo()
|
44 |
### select the Best catalogues
|
45 |
cataloguesinfos=self.SelectBestPubDBInfo(cataloguecoll_map)
|
46 |
return cataloguesinfos
|
47 |
|
48 |
#########################################################################
|
49 |
def ExtractPubDBInfo(self):
|
50 |
"""
|
51 |
Extract all the information from the PubDB analysis interface
|
52 |
"""
|
53 |
try:
|
54 |
#print " contacting PubDb... "+self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections+"\n"
|
55 |
f = urllib.urlopen(self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections)
|
56 |
except IOError:
|
57 |
raise PubDBInfoError(self.Collections)
|
58 |
|
59 |
data = f.read()
|
60 |
#print data
|
61 |
if len(data)>0:
|
62 |
if data[0]=='<':
|
63 |
raise PubDBInfoNoCollectionError(self.Collections)
|
64 |
try:
|
65 |
catalogues = PHPUnserialize().unserialize(data)
|
66 |
except IOError:
|
67 |
raise PHPUnserializeError(data)
|
68 |
try:
|
69 |
catinfos=[]
|
70 |
collmap={}
|
71 |
for k in catalogues.keys():
|
72 |
CollId=catalogues[k]['CollectionId']
|
73 |
## get also the collection type
|
74 |
CollType=catalogues[k]['CollectionType']
|
75 |
## set primary collection flag
|
76 |
PrimaryCollFlag=0
|
77 |
if ( CollId == self.PrimaryCollID ) : PrimaryCollFlag=1
|
78 |
colllist=[]
|
79 |
#print ">>> Catalogues for Collection: "+CollId+"\n"
|
80 |
|
81 |
cat=catalogues[k]['Catalogue']
|
82 |
for kcat in cat.keys():
|
83 |
##print ("key %s, val %s" %(kcat,cat[kcat]))
|
84 |
ContactString=cat[kcat]['ContactString']
|
85 |
ContactProtocol=cat[kcat]['ContactProtocol']
|
86 |
CatalogueType=cat[kcat]['CatalogueType']
|
87 |
ValidationStatus=cat[kcat]['ValidationStatus']
|
88 |
#print "CS: "+ContactString
|
89 |
#print "CP: "+ContactProtocol
|
90 |
#print "CT: "+CatalogueType
|
91 |
#print "VS: "+ValidationStatus
|
92 |
ce=cat[kcat]['CEs']
|
93 |
CElist=[]
|
94 |
for kce in ce.keys():
|
95 |
##print ("key %s, val %s" %(kce,ce[kce]))
|
96 |
CE=ce[kce]
|
97 |
CElist.append(ce[kce])
|
98 |
#print " CE list :"
|
99 |
#for aCE in CElist:
|
100 |
# print " CE : "+aCE
|
101 |
cc=cat[kcat]['CatalogueContents']
|
102 |
for kcc in cc.keys():
|
103 |
##print ("key %s, val %s" %(kcc,cc[kcc]))
|
104 |
FileType=cc[kcc]['FileType']
|
105 |
SE=cc[kcc]['SE']
|
106 |
#print "FT: "+FileType
|
107 |
#print "SE: "+SE
|
108 |
if cc[kcc]['Variables']==None:
|
109 |
Variables=''
|
110 |
else:
|
111 |
for kvar in cc[kcc]['Variables'].keys():
|
112 |
Variables=kvar+"="+cc[kcc]['Variables'][kvar]
|
113 |
#print "Variables: "+Variables
|
114 |
run=cc[kcc]['RunRange']
|
115 |
for krun in run.keys():
|
116 |
##print ("key %s, val %s" %(krun,run[krun]))
|
117 |
reTot = re.compile(r'TotalEvents=(\d*)')
|
118 |
TotalEvents=reTot.search(run[krun]).group(1)
|
119 |
reFirst= re.compile(r'FirstRun=(\d*)')
|
120 |
FirstRun = reFirst.search(run[krun]).group(1)
|
121 |
reLast= re.compile(r'LastRun=(\d*)')
|
122 |
LastRun = reLast.search(run[krun]).group(1)
|
123 |
#print "Nevents: "+TotalEvents
|
124 |
#print "First: "+FirstRun
|
125 |
#print "Last: "+LastRun
|
126 |
#print "----------------------------------"
|
127 |
|
128 |
## fill a catlogue entry
|
129 |
# acatalogue=catalogEntryNew(FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
|
130 |
## store collection type and primarycollection flag
|
131 |
acatalogue=catalogEntryNew(CollType,PrimaryCollFlag,FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
|
132 |
|
133 |
## list the catalogues belonging to a given collection
|
134 |
colllist.append(acatalogue)
|
135 |
|
136 |
## dictionary grouping catalogues by CollectionID
|
137 |
collmap[CollId]=colllist
|
138 |
|
139 |
except IOError:
|
140 |
raise PHPUnserializeError(data)
|
141 |
|
142 |
return collmap
|
143 |
|
144 |
########################################################################
|
145 |
def SelectBestPubDBInfo(self,cataloglist):
|
146 |
"""
|
147 |
Select the lists of needed catalogues (from a set of catalogues refering to the same collection and FileType selects the best on based on protocol)
|
148 |
"""
|
149 |
|
150 |
selectcatalogues=[]
|
151 |
|
152 |
### for each collection
|
153 |
for collid in cataloglist.keys():
|
154 |
#print ("key %s, val %s" %(collid,cataloglist[collid]))
|
155 |
### get all the possible FileTypes
|
156 |
filetypes=[]
|
157 |
for catalog in cataloglist[collid]:
|
158 |
if catalog.FileType not in filetypes :
|
159 |
filetypes.append(catalog.FileType)
|
160 |
### dictionary grouping catalogues by FileType
|
161 |
ftmap={}
|
162 |
for afiletype in filetypes:
|
163 |
#print ' filetype is '+afiletype+' for collid='+collid
|
164 |
sameFileType=[]
|
165 |
for catalog in cataloglist[collid]:
|
166 |
if catalog.FileType==afiletype :
|
167 |
sameFileType.append(catalog)
|
168 |
ftmap[afiletype]=sameFileType
|
169 |
### select only one catalogue among the catalouges with the same FileType
|
170 |
for ft in ftmap.keys():
|
171 |
#print ("key %s, val %s" %(ft,ftmap[ft]))
|
172 |
bestcatalog=self.SelectBestCatalog(ftmap[ft])
|
173 |
selectcatalogues.append(bestcatalog)
|
174 |
|
175 |
### return the minimal list of needed catalogues
|
176 |
return selectcatalogues
|
177 |
|
178 |
####################################################
|
179 |
def SelectBestCatalog(self,ftcat):
|
180 |
"""
|
181 |
From a set of catalogues with the same FileTypeand different access protocol, select the one according to access protocols
|
182 |
"""
|
183 |
sortedProtocols = self.protocolPrio_
|
184 |
|
185 |
### if just one catalog, just return it!
|
186 |
if (len(ftcat)==1):
|
187 |
#print '----- Just one catalogue, no selection based on protocol needed'
|
188 |
#ftcat[0].dump()
|
189 |
#print '---------------------'
|
190 |
return ftcat[0]
|
191 |
### oterwise select the best catalogue based on protocol
|
192 |
for prot in sortedProtocols:
|
193 |
for cat in ftcat:
|
194 |
if cat.ContactProtocol==prot:
|
195 |
#print '----- Catalogue selected based on protocol : '+prot
|
196 |
#cat.dump()
|
197 |
#print '---------------------'
|
198 |
return cat
|
199 |
|