ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PubDB.py
Revision: 1.4
Committed: Wed Aug 10 16:52:51 2005 UTC (19 years, 8 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
Changes since 1.3: +8 -10 lines
Log Message:
more check on DB plus other

File Contents

# Content
1 #!/usr/bin/env python2
2 import sys, os, string, re
3 import urllib, urllister
4 import urllib2
5 import common
6 from RefDBInfo import RefDBInfo
7
8 # ####################################
9 class PubDBError:
10 def __init__(self, url):
11 print '\nERROR accessing PubDB at '+url+'\n'
12 pass
13
14 # ####################################
15 class RefDBmapError:
16 def __init__(self, url):
17 print '\nERROR accessing RefDB-PubDBs map at '+url+'\n'
18 pass
19
20 # ####################################
21 class NoPHPError:
22 def __init__(self, url):
23 #print '\nERROR accessing PHP at '+url+' \n'
24 print 'ERROR accessing PHP: ',url,' \n'
25 pass
26
27 # ####################################
28 class pubDBResult:
29 def __init__(self,
30 contents):
31 self.contents=contents
32
33
34 def dump(self):
35 print 'Contents : ',self.contents
36 pass
37
38 # ####################################
39 # class to access PubDBs
40 class PubDB:
41 def __init__(self, owner, dataset, dataTiers, cfg_params):
42
43 # Attributes
44 self.owner = owner
45 self.dataset = dataset
46 self.dataTiers = dataTiers
47 self.cfg_params = cfg_params
48
49 self.RefDBurl_ = 'http://cmsdoc.cern.ch/cms/production/www/'
50 self.RefDBphp_ = 'PubDB/GetIdCollection.php'
51 self.RefDBMotherphp_ = 'cgi/SQL/CollectionTree.php'
52
53 self.PubDBCentralUrl_ = 'http://cmsdoc.cern.ch/cms/production/www/PubDB/'
54 self.PubDBCentralPhp_ = 'GetPublishedCollectionInfoFromRefDB.php'
55
56 self.PubDBAnalysisPhp_ = 'get-pubdb-analysisinfo.php'
57 self.PubDBAnalysisPhpOld_ = 'get-pubdb-analysisinfo.php'
58
59 ## link to the modified RefDB-PubDBs map script that allow the display option
60 self.RefDBPubDBsmapPhp_ = 'GetPublishedCollectionInfoFromRefDB.mod.php?display=1'
61
62 # Costructor procedures
63
64 CEBlackList = []
65 try:
66 tmpBad = string.split(self.cfg_params['USER.ce_black_list'],',')
67 #tmpBad = ['fnal']
68 for tmp in tmpBad:
69 tmp=string.strip(tmp)
70 CEBlackList.append(tmp)
71 except KeyError:
72 pass
73 print 'CEBlackList: ',CEBlackList
74 self.reCEBlackList=[]
75 for bad in CEBlackList:
76 self.reCEBlackList.append(re.compile( bad ))
77 #print 'ReBad: ',self.reCEBlackList
78
79
80 ########################################################################
81 def findAllCollections(self):
82 """
83 Contact RefDB and find the CollID of all the user required collections
84 """
85 ## download from RefDB all the info about the given dataset-owner
86 refdb=RefDBInfo(self.owner,self.dataset)
87 #print refdb.GetRefDBInfo()
88 try:
89 collInfos=refdb.GetRefDBInfo()
90 except :
91 sys.exit(10)
92 #print "collInfos=", collInfos
93
94 first=1
95 NeededCollID=[]
96 refdbdataTiers=[]
97 for coll in collInfos:
98 ## select the primary collection
99 if first:
100 NeededCollID.append(coll[0])
101 refdbdataTiers.append(coll[2])
102 common.logger.message("\n --> primary collection for owner "+self.owner+" is: ID="+coll[0]+" DataTier="+coll[2])
103 first=0
104 else:
105 ## select only the parents collections corresponding to data-tiers requested by the user
106 if self.dataTiers.count(coll[2]):
107 NeededCollID.append(coll[0])
108 common.logger.message(" --> further collection required: ID="+coll[0]+" DataTier="+coll[2])
109 refdbdataTiers.append(coll[2])
110
111 ## check that the user asks for Data Tier really existing in RefDB, otherwise give a warning message
112 for dt in self.dataTiers:
113 if refdbdataTiers.count(dt)<=0:
114 msg = "ERROR: Data Tier ( =>",dt,"<= ) not existing for dataset/owner "+ self.dataset+"/"+self.owner+"!"
115 msg = msg + "Check the data_tier variable in crab.cfg"
116 msg = msg + 'Owner Dataset not published with asked dataTiers! '+\
117 self.owner+' '+ self.dataset+' '+self.dataTiers
118 common.logger.message(msg)
119 return []
120
121 #print 'Needed Collections are ', NeededCollID
122 #return collInfos
123 #print "NeededCollID= ", NeededCollID
124 return NeededCollID
125
126 ########################################################################
127 def findPubDBsbyCollID(self,CollID):
128 """
129 Find the list of PubDB URLs having a given Collection
130 """
131 ### contact the RefDB-PubDBs map to discovery where the given CollID is
132 url = self.PubDBCentralUrl_+self.RefDBPubDBsmapPhp_+'&CollID=' + CollID
133 # print "%s"%(url)
134 try:
135 f = urllib.urlopen(url)
136 except IOError:
137 # print 'Cannot access URL: '+url
138 raise RefDBmapError(url)
139
140 ### search for the PubDBURL string
141 reURLLine=re.compile( r'PubDBURL=(\S*)' )
142
143 PubDBURLs = []
144 for line in f.readlines():
145 #print '#',line,'#'
146 if reURLLine.search(line) :
147 URLLine=reURLLine.search(line).group()
148 #print string.split(URLLine,'=')[1]
149 PubDBURLs.append(string.split(URLLine,'=')[1])
150
151 ### return the list of PubDBURL where the collection is present
152 #return PubDBURLs
153 return self.uniquelist(PubDBURLs)
154
155 ################################################################
156 def findPubDBs(self,CollIDs):
157 """
158 Find the list of PubDB URLs having ALL the required collections
159 """
160 ### loop over all the required collections
161 #pubdbmap={}
162 allurls=[]
163 countColl=0
164 for CollID in CollIDs :
165 countColl=countColl+1
166 ### map the CollectionID with the list of PubDB URLs
167 #pubdbmap[CollID]=self.findPubDBsbyCollID(CollID)
168 ### prepare a list all PubDB urls for all collections
169 allurls.extend(self.findPubDBsbyCollID(CollID))
170 #print pubdbmap.values()
171
172 ### select only PubDB urls that contains all the collections
173 unique_urls=self.uniquelist(allurls)
174 SelectedPubDBURLs=[]
175 # loop on a unique list of PubDB urls
176 for url in unique_urls :
177 # check that PubDBurl occurrance is the same as the number of collections
178 if ( allurls.count(url)==countColl ) :
179 SelectedPubDBURLs.append(url)
180
181 #print 'Required Collections',CollIDs,'are all present in PubDBURLs : ',SelectedPubDBURLs,'\n'
182 #return SelectedPubDBURLs
183 #### check based on CE black list: select only PubDB not in the CE black list
184 GoodPubDBURLs=self.checkBlackList(SelectedPubDBURLs)
185 return GoodPubDBURLs
186
187 #######################################################################
188 def uniquelist(self, old):
189 """
190 remove duplicates from a list
191 """
192 nd={}
193 for e in old:
194 nd[e]=0
195 return nd.keys()
196
197 #######################################################################
198 def checkBlackList(self, pubDBUrls):
199 """
200 select PubDB URLs that are at site not excluded by the user (via CE black list)
201 """
202 goodurls = []
203 for url in pubDBUrls:
204 print 'connecting to the URL ',url
205 good=1
206 for re in self.reCEBlackList:
207 if re.search(url):
208 common.logger.message('CE in black list, skipping PubDB URL '+url)
209 good=0
210 pass
211 if good: goodurls.append(url)
212 if len(goodurls) == 0:
213 common.logger.debug(3,"No selected PubDB URLs")
214 return goodurls
215
216 ########################################################################
217 def getPubDBData(self, CollIDs, url):
218 """
219 Contact a PubDB to collect all the relevant information
220 """
221 result = []
222 for CollID in CollIDs:
223 end=string.rfind(url,'/')
224 lastEq=string.rfind(url,'=')
225 urlphp=url[:end+1]+self.PubDBAnalysisPhp_+'?CollID='+CollID
226 # print 'PHP URL: '+urlphp+' \n'
227
228 reOld=re.compile( r'V24' )
229 #print urlphp,'Old PubDB ',reOld.search(urlphp)
230 if reOld.search(urlphp):
231 raise NoPHPError(urlphp)
232 else:
233 try:
234 f = urllib2.urlopen(urlphp)
235 except urllib2.URLError, msg:
236 print "WARNING: ", msg
237 raise PubDBError(urlphp)
238 except urllib2.HTTPError, msg:
239 print "WARNING: ", msg
240 raise NoPHPError(urlphp)
241 content = f.read()
242 result.append(pubDBResult(content))
243 #print "Coll",CollID," content ",content
244 pass
245 pass
246
247 #print '.....'
248 #for r in result:
249 # r.dump()
250 #print '.....'
251 return result
252
253 ########################################################################
254 def getAllPubDBData(self, CollIDs, urllist):
255 """
256 Contact a list of PubDB to collect all the relevant information
257 """
258 completeResult=[]
259 for pubdburl in urllist:
260 completeResult.append(self.getPubDBData(CollIDs,pubdburl))
261
262 ## print for debugging purpose
263 #for result in completeResult:
264 # print '..... PubDB Site URL :',pubdburl
265 # for r in result:
266 # r.dump()
267 # print '.....................................'
268
269 return completeResult
270 ####################################################################