1 |
slacapra |
1.1 |
#!/usr/bin/env python
|
2 |
|
|
import sys, os, string, re
|
3 |
|
|
import urllib, urllister
|
4 |
|
|
import urllib2
|
5 |
|
|
from UnserializePHP import *
|
6 |
|
|
from orcarcBuilderNew import *
|
7 |
|
|
|
8 |
|
|
class PubDBInfoError:
|
9 |
|
|
def __init__(self, Collections):
|
10 |
|
|
print '\nERROR accessing PubDB for Collections: '+Collections+'\n'
|
11 |
|
|
pass
|
12 |
|
|
class PubDBInfoNoCollectionError:
|
13 |
|
|
def __init__(self, Collections):
|
14 |
|
|
print '\nERROR No Collections found in PubDB : '+Collections+'\n'
|
15 |
|
|
pass
|
16 |
|
|
class NoPHPError:
|
17 |
|
|
def __init__(self, url):
|
18 |
|
|
#print '\nERROR accessing PHP at '+url+' \n'
|
19 |
|
|
print 'ERROR accessing PHP: ',url,'isn\'t updated version \n'
|
20 |
|
|
pass
|
21 |
|
|
class PubDBInfoResult:
|
22 |
|
|
def __init__(self,
|
23 |
|
|
contents):
|
24 |
|
|
self.contents=contents
|
25 |
|
|
|
26 |
|
|
################################################################################
|
27 |
|
|
# Class to connect to PubDB interface for the analysis and download the data in one shot using the serialized PHP data.
|
28 |
|
|
################################################################################
|
29 |
|
|
class PubDBInfo:
|
30 |
|
|
def __init__(self, pubdburl, Collections):
|
31 |
|
|
self.Collections= Collections
|
32 |
|
|
self.PubDBurl_ = pubdburl
|
33 |
|
|
#self.PubDBInfophp_ = 'pubdb-get-analisys-info.php'
|
34 |
|
|
self.PubDBInfophp_ = 'pubdb-get-analysisinfo.php'
|
35 |
|
|
self.protocolPrio_ = ['http', 'rfio', 'mysql' , 'gridftp']
|
36 |
|
|
#self.protocolPrio_ = ['http', 'mysql' , 'rfio' , 'gridftp']
|
37 |
|
|
|
38 |
|
|
##########################################################################
|
39 |
|
|
def GetPubDBInfo(self):
|
40 |
|
|
"""
|
41 |
|
|
Get all the catalogues-related info from PubDB and select the best ones if multiple choices are possible
|
42 |
|
|
"""
|
43 |
|
|
### extract catalogues related info from pubDB
|
44 |
|
|
cataloguecoll_map = self.ExtractPubDBInfo()
|
45 |
|
|
### select the Best catalogues
|
46 |
|
|
cataloguesinfos=self.SelectBestPubDBInfo(cataloguecoll_map)
|
47 |
|
|
return cataloguesinfos
|
48 |
|
|
|
49 |
|
|
#########################################################################
|
50 |
|
|
def ExtractPubDBInfo(self):
|
51 |
|
|
"""
|
52 |
|
|
Extract all the information from the PubDB analysis interface
|
53 |
|
|
"""
|
54 |
|
|
try:
|
55 |
|
|
#print " contacting PubDb... "+self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections+"\n"
|
56 |
|
|
f = urllib.urlopen(self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections)
|
57 |
|
|
except IOError:
|
58 |
|
|
raise PubDBInfoError(self.Collections)
|
59 |
|
|
|
60 |
|
|
data = f.read()
|
61 |
|
|
#print data
|
62 |
|
|
if len(data)>0:
|
63 |
|
|
if data[0]=='<':
|
64 |
|
|
raise PubDBInfoNoCollectionError(self.Collections)
|
65 |
|
|
try:
|
66 |
|
|
catalogues = PHPUnserialize().unserialize(data)
|
67 |
|
|
except IOError:
|
68 |
|
|
raise PHPUnserializeError(data)
|
69 |
|
|
try:
|
70 |
|
|
catinfos=[]
|
71 |
|
|
collmap={}
|
72 |
|
|
for k in catalogues.keys():
|
73 |
|
|
CollId=catalogues[k]['CollectionId']
|
74 |
|
|
colllist=[]
|
75 |
|
|
#print ">>> Catalogues for Collection: "+CollId+"\n"
|
76 |
|
|
|
77 |
|
|
cat=catalogues[k]['Catalogue']
|
78 |
|
|
for kcat in cat.keys():
|
79 |
|
|
##print ("key %s, val %s" %(kcat,cat[kcat]))
|
80 |
|
|
ContactString=cat[kcat]['ContactString']
|
81 |
|
|
ContactProtocol=cat[kcat]['ContactProtocol']
|
82 |
|
|
CatalogueType=cat[kcat]['CatalogueType']
|
83 |
|
|
ValidationStatus=cat[kcat]['ValidationStatus']
|
84 |
|
|
#print "CS: "+ContactString
|
85 |
|
|
#print "CP: "+ContactProtocol
|
86 |
|
|
#print "CT: "+CatalogueType
|
87 |
|
|
#print "VS: "+ValidationStatus
|
88 |
|
|
ce=cat[kcat]['CEs']
|
89 |
|
|
CElist=[]
|
90 |
|
|
for kce in ce.keys():
|
91 |
|
|
##print ("key %s, val %s" %(kce,ce[kce]))
|
92 |
|
|
CE=ce[kce]
|
93 |
|
|
CElist.append(ce[kce])
|
94 |
|
|
#print " CE list :"
|
95 |
|
|
#for aCE in CElist:
|
96 |
|
|
# print " CE : "+aCE
|
97 |
|
|
cc=cat[kcat]['CatalogueContents']
|
98 |
|
|
for kcc in cc.keys():
|
99 |
|
|
##print ("key %s, val %s" %(kcc,cc[kcc]))
|
100 |
|
|
FileType=cc[kcc]['FileType']
|
101 |
|
|
SE=cc[kcc]['SE']
|
102 |
|
|
#print "FT: "+FileType
|
103 |
|
|
#print "SE: "+SE
|
104 |
|
|
if cc[kcc]['Variables']==None:
|
105 |
|
|
Variables=''
|
106 |
|
|
else:
|
107 |
|
|
for kvar in cc[kcc]['Variables'].keys():
|
108 |
|
|
Variables=kvar+"="+cc[kcc]['Variables'][kvar]
|
109 |
|
|
#print "Variables: "+Variables
|
110 |
|
|
run=cc[kcc]['RunRange']
|
111 |
|
|
for krun in run.keys():
|
112 |
|
|
##print ("key %s, val %s" %(krun,run[krun]))
|
113 |
|
|
reTot = re.compile(r'TotalEvents=(\d*)')
|
114 |
|
|
TotalEvents=reTot.search(run[krun]).group(1)
|
115 |
|
|
reFirst= re.compile(r'FirstRun=(\d*)')
|
116 |
|
|
FirstRun = reFirst.search(run[krun]).group(1)
|
117 |
|
|
reLast= re.compile(r'LastRun=(\d*)')
|
118 |
|
|
LastRun = reLast.search(run[krun]).group(1)
|
119 |
|
|
#print "Nevents: "+TotalEvents
|
120 |
|
|
#print "First: "+FirstRun
|
121 |
|
|
#print "Last: "+LastRun
|
122 |
|
|
#print "----------------------------------"
|
123 |
|
|
|
124 |
|
|
## fill a catlogue entry
|
125 |
|
|
acatalogue=catalogEntryNew(FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
|
126 |
|
|
## list the catalogues belonging to a given collection
|
127 |
|
|
colllist.append(acatalogue)
|
128 |
|
|
|
129 |
|
|
## dictionary grouping catalogues by CollectionID
|
130 |
|
|
collmap[CollId]=colllist
|
131 |
|
|
|
132 |
|
|
except IOError:
|
133 |
|
|
raise PHPUnserializeError(data)
|
134 |
|
|
|
135 |
|
|
return collmap
|
136 |
|
|
|
137 |
|
|
########################################################################
|
138 |
|
|
def SelectBestPubDBInfo(self,cataloglist):
|
139 |
|
|
"""
|
140 |
|
|
Select the lists of needed catalogues (from a set of catalogues refering to the same collection and FileType selects the best on based on protocol)
|
141 |
|
|
"""
|
142 |
|
|
|
143 |
|
|
selectcatalogues=[]
|
144 |
|
|
|
145 |
|
|
### for each collection
|
146 |
|
|
for collid in cataloglist.keys():
|
147 |
|
|
#print ("key %s, val %s" %(collid,cataloglist[collid]))
|
148 |
|
|
### get all the possible FileTypes
|
149 |
|
|
filetypes=[]
|
150 |
|
|
for catalog in cataloglist[collid]:
|
151 |
|
|
if catalog.FileType not in filetypes :
|
152 |
|
|
filetypes.append(catalog.FileType)
|
153 |
|
|
### dictionary grouping catalogues by FileType
|
154 |
|
|
ftmap={}
|
155 |
|
|
for afiletype in filetypes:
|
156 |
|
|
#print ' filetype is '+afiletype+' for collid='+collid
|
157 |
|
|
sameFileType=[]
|
158 |
|
|
for catalog in cataloglist[collid]:
|
159 |
|
|
if catalog.FileType==afiletype :
|
160 |
|
|
sameFileType.append(catalog)
|
161 |
|
|
ftmap[afiletype]=sameFileType
|
162 |
|
|
### select only one catalogue among the catalouges with the same FileType
|
163 |
|
|
for ft in ftmap.keys():
|
164 |
|
|
#print ("key %s, val %s" %(ft,ftmap[ft]))
|
165 |
|
|
bestcatalog=self.SelectBestCatalog(ftmap[ft])
|
166 |
|
|
selectcatalogues.append(bestcatalog)
|
167 |
|
|
|
168 |
|
|
### return the minimal list of needed catalogues
|
169 |
|
|
return selectcatalogues
|
170 |
|
|
|
171 |
|
|
####################################################
|
172 |
|
|
def SelectBestCatalog(self,ftcat):
|
173 |
|
|
"""
|
174 |
|
|
From a set of catalogues with the same FileTypeand different access protocol, select the one according to access protocols
|
175 |
|
|
"""
|
176 |
|
|
sortedProtocols = self.protocolPrio_
|
177 |
|
|
|
178 |
|
|
### if just one catalog, just return it!
|
179 |
|
|
if (len(ftcat)==1):
|
180 |
|
|
#print '----- Just one catalogue, no selection based on protocol needed'
|
181 |
|
|
#ftcat[0].dump()
|
182 |
|
|
#print '---------------------'
|
183 |
|
|
return ftcat[0]
|
184 |
|
|
### oterwise select the best catalogue based on protocol
|
185 |
|
|
for prot in sortedProtocols:
|
186 |
|
|
for cat in ftcat:
|
187 |
|
|
if cat.ContactProtocol==prot:
|
188 |
|
|
#print '----- Catalogue selected based on protocol : '+prot
|
189 |
|
|
#cat.dump()
|
190 |
|
|
#print '---------------------'
|
191 |
|
|
return cat
|
192 |
|
|
|