ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PubDB.py
Revision: 1.1
Committed: Tue Jun 7 07:21:04 2005 UTC (19 years, 10 months ago) by nsmirnov
Content type: text/x-python
Branch: MAIN
Log Message:
pubdb.py renamed to PubDB.py

File Contents

# Content
1 #!/usr/bin/env python
2 import sys, os, string, re
3 import urllib, urllister
4 import urllib2
5
6 # ####################################
7 # Exception with use of refDB
8 class RefDBError:
9 def __init__(self, owner, dataset):
10 print '\nERROR accessing RefDB for Owner/Dataset: '+owner+'/'+dataset+'\n'
11 pass
12
13 # ####################################
14 class PubDBError:
15 def __init__(self, url):
16 print '\nERROR accessing PubDB at '+url+'\n'
17 pass
18
19 # ####################################
20 class NoPHPError:
21 def __init__(self, url):
22 #print '\nERROR accessing PHP at '+url+' \n'
23 print 'ERROR accessing PHP: ',url,'isn\'t updated version \n'
24 pass
25
26 # ####################################
27 class pubDBResult:
28 def __init__(self,
29 contents):
30 self.contents=contents
31
32
33 def dump(self):
34 print 'Contents : ',self.contents
35
36 # ####################################
37 # class to access PubDBs
38 class PubDB:
39 def __init__(self, owner, dataset, dataTiers):
40 self.owner = owner
41 self.dataset = dataset
42 self.dataTiers = dataTiers
43
44 self.RefDBurl_ = 'http://cmsdoc.cern.ch/cms/production/www/'
45 self.RefDBphp_ = 'PubDB/GetIdCollection.php'
46 self.RefDBMotherphp_ = 'cgi/SQL/CollectionTree.php'
47
48 self.PubDBCentralUrl_ = 'http://cmsdoc.cern.ch/cms/production/www/PubDB/'
49 self.PubDBCentralPhp_ = 'GetPublishedCollectionInfoFromRefDB.php'
50
51 self.PubDBAnalysisPhp_ = 'get-pubdb-analysisinfo.php'
52 self.PubDBAnalysisPhpOld_ = 'get-pubdb-analysisinfo.php'
53
54 try:
55 self.collid=self.findAllCollId()
56 except PubDBError:
57 raise RefDBError(self.owner, self.dataset)
58
59 ########################################################################
60 def findAllCollId(self):
61 collId=self.findCollId()
62
63
64 NeededCollID = []
65 NeededCollID.append(collId)
66 #dataTypeReq = ['Digi' ]#, 'Digi', 'Hit', 'PU']
67
68 if len(self.dataTiers)>0:
69 dataTypeReq = self.dataTiers
70 CollInfos=self.findMotherCollId(collId)
71 while (CollInfos[1][2]!='PU'):
72
73 for TypeReq in dataTypeReq:
74 for CollInfo in CollInfos[1:]:
75 if TypeReq==CollInfo[2]:
76 NeededCollID.append(CollInfo[0])
77 break
78 pass
79 CollInfos=self.findMotherCollId(CollInfo[0])
80 ### no more parents
81 if len(CollInfos)==1:
82 break
83
84
85 print NeededCollID
86 return NeededCollID
87
88
89
90 ########################################################################
91 def findCollId(self):
92 """
93 Contact RefDB and get CollId given Dataset and Owner
94 """
95
96 #anche questa info viene dal cfg. E' PubDB centrale
97 url = self.RefDBurl_+self.RefDBphp_+'?Owner=' + self.owner + '&Dataset=' + self.dataset
98
99 try:
100 f = urllib.urlopen(url)
101 except IOError:
102 # print 'Cannot access URL: '+url
103 raise PubDBError(url)
104
105 line = f.read()
106 try:
107 collid = string.split(line,": ")
108 #part = string.strip(part[1])
109 collid = string.split(collid[1],"<")
110 collid = string.strip(collid[0])
111 except IndexError:
112 raise PubDBError(url)
113
114 print 'CollectionId: '+collid+' \n'
115 return collid
116
117 ########################################################################
118 def findMotherCollId(self, collid):
119 """
120 Contact RefDB and get CollId of mother of current Dataset Owner (eg. Digi if DST, Hit if Digi)
121 """
122
123 url = self.RefDBurl_+self.RefDBMotherphp_+'?cid=' + collid
124
125 try:
126 f = urllib.urlopen(url)
127 except IOError:
128 # print 'Cannot access URL: '+url
129 raise PubDBError(url)
130
131 reEmptyLine = re.compile( r'^$' )
132
133 collInfos = []
134 for line in f.readlines():
135 #print '#',line,'#'
136 line = string.strip(line)
137 if reEmptyLine.match(line):
138 pass
139 else:
140 #print '#',line,'#'
141 keys = string.split(line,',')
142 #print '#',keys,'#'
143 collInfo = []
144 for key in keys:
145 collInfo.append(string.split(key, '=')[1])
146 collInfos.append(collInfo)
147
148 return collInfos
149
150 ########################################################################
151 def getPubDBInfo(self, url):
152 """
153 Contact a local PubDB to collect all the relevant information
154 """
155
156 result = []
157 end=string.rfind(url,'/')
158 lastEq=string.rfind(url,'=')
159 urlphp=url[:end+1]+self.PubDBAnalysisPhp_+'?CollID='+url[lastEq+1:]
160 # print 'PHP URL: '+urlphp+' \n'
161
162 reOld=re.compile( r'V24' )
163 #print urlphp,'Old PubDB ',reOld.search(urlphp)
164 if reOld.search(urlphp):
165 raise NoPHPError(urlphp)
166 # try:
167 # urldev=string.replace(urlphp,'V24','V3_1')
168 # print "urldev URL: ",urldev
169 # f = urllib2.urlopen(urldev)
170 # except urllib2.HTTPError, msg:
171 # raise NoPHPError(urldev)
172 else:
173 try:
174 f = urllib2.urlopen(urlphp)
175 except urllib2.HTTPError, msg:
176 raise NoPHPError(urlphp)
177
178 content = f.read()
179 return pubDBResult(content)
180
181 ########################################################################
182 def findPubDBsUrls(self):
183 """
184 Find the URL of the PubDB of all the sites which publish the collid
185 """
186
187 completeResult = []
188
189 ### first collId is the primary one, (Dataset/Owner asker by user)
190 ### The other CollIDs are the parents
191
192 ### Get all the pubDb's URL containig the primary collection _and_ the requested parents
193 primaryCollId=self.collid[0]
194
195 primaryUrl = self.PubDBCentralUrl_+self.PubDBCentralPhp_+'?CollID=' + primaryCollId
196 #print "primaryUrl=", primaryUrl
197
198 try:
199 sock = urllib.urlopen(primaryUrl)
200 except IOError:
201 raise PubDBError(primaryUrl)
202
203 parser = urllister.URLLister()
204 parser.feed(sock.read())
205 sock.close()
206 parser.close()
207
208 # this are all the href links found in the page
209 for url in parser.linksList:
210 # return only those which contains "CollID"
211 result=[]
212 if string.find(url, primaryCollId) != -1 :
213 #print 'URL ',url
214 result.append(url)
215
216 try:
217 for tmp in self.checkPubDBs(url):
218 result.append(tmp)
219 except PubDBError:
220 continue
221
222 completeResult.append(result)
223
224 return completeResult
225
226 #########################################################
227 def checkPubDBs(self, url):
228 """
229 Check if the given PubDB contains also the CollId's collections
230 """
231
232 result = []
233 primaryCollId=self.collid[0]
234
235 reNotCollId=re.compile( r'no such collection' )
236 reDBError=re.compile( r'DB Error: connect failed' )
237 for collid in self.collid[1:]:
238 newurl = string.replace(url,primaryCollId,collid)
239 try:
240 sock = urllib.urlopen(newurl)
241 for line in sock.readlines():
242 line = string.strip(line)
243 if reNotCollId.search(line):
244 raise PubDBError(newurl)
245 if reDBError.search(line):
246 raise PubDBError(newurl)
247 except IOError:
248 raise PubDBError(newurl)
249 sock.close()
250 result.append(newurl)
251
252 return result
253
254 #########################################################
255 def getAllPubDBsInfo(self):
256 """
257 Prepare the file to send in InputSandbox, with the info retrieved by local PubDBs
258 """
259
260 # get all the URLs of PubDBs which publish CollId
261 pubDBUrlsSet = self.findPubDBsUrls()
262 # print 'pubDBUrls ',pubDBUrlsSet
263
264 # get the contents of each PubDBs
265 completeResult = []
266 for pubDBUrls in pubDBUrlsSet:
267 result = []
268 for url in pubDBUrls:
269 # print 'URL ',url
270 try:
271 result.append(self.getPubDBInfo(url))
272 except NoPHPError:
273 continue
274 # for r in result:
275 # r.dump()
276 if len(result)>0: completeResult.append(result)
277
278 # print 'getAllPubDBsInfo ',completeResult
279 # for result in completeResult:
280 # print '.....'
281 # for r in result:
282 # r.dump()
283 # print '.....'
284 return completeResult