10 |
|
def __init__(self, url): |
11 |
|
print '\nERROR accessing PubDB at '+url+'\n' |
12 |
|
pass |
13 |
< |
|
13 |
> |
|
14 |
> |
# #################################### |
15 |
> |
class PubDBGetAnalysisError: |
16 |
> |
def __init__(self, url,Collections): |
17 |
> |
print '\nERROR extracting info for collections '+Collections+' from PubDB '+url+'.\n' |
18 |
> |
pass |
19 |
> |
|
20 |
|
# #################################### |
21 |
|
class RefDBmapError: |
22 |
|
def __init__(self, url): |
50 |
|
self.owner = owner |
51 |
|
self.dataset = dataset |
52 |
|
self.dataTiers = dataTiers |
53 |
+ |
self.NeededdataTiers=[] |
54 |
|
self.cfg_params = cfg_params |
55 |
|
|
56 |
|
self.RefDBurl_ = 'http://cmsdoc.cern.ch/cms/production/www/' |
70 |
|
|
71 |
|
CEBlackList = [] |
72 |
|
try: |
73 |
< |
tmpBad = string.split(self.cfg_params['USER.ce_black_list'],',') |
73 |
> |
tmpBad = string.split(self.cfg_params['EDG.ce_black_list'],',') |
74 |
|
#tmpBad = ['fnal'] |
75 |
|
for tmp in tmpBad: |
76 |
|
tmp=string.strip(tmp) |
77 |
+ |
if (tmp == 'cnaf'): tmp = 'webserver' ########## warning: temp. patch |
78 |
|
CEBlackList.append(tmp) |
79 |
|
except KeyError: |
80 |
|
pass |
81 |
< |
print 'CEBlackList: ',CEBlackList |
81 |
> |
common.logger.debug(5,'CEBlackList: '+str(CEBlackList)) |
82 |
|
self.reCEBlackList=[] |
83 |
|
for bad in CEBlackList: |
84 |
|
self.reCEBlackList.append(re.compile( bad )) |
106 |
|
## select the primary collection |
107 |
|
if first: |
108 |
|
NeededCollID.append(coll[0]) |
109 |
+ |
self.NeededdataTiers.append(coll[2]) |
110 |
|
refdbdataTiers.append(coll[2]) |
111 |
|
common.logger.message("\n --> primary collection for owner "+self.owner+" is: ID="+coll[0]+" DataTier="+coll[2]) |
112 |
|
first=0 |
114 |
|
## select only the parents collections corresponding to data-tiers requested by the user |
115 |
|
if self.dataTiers.count(coll[2]): |
116 |
|
NeededCollID.append(coll[0]) |
117 |
+ |
self.NeededdataTiers.append(coll[2]) |
118 |
|
common.logger.message(" --> further collection required: ID="+coll[0]+" DataTier="+coll[2]) |
119 |
|
refdbdataTiers.append(coll[2]) |
120 |
|
|
121 |
|
## check that the user asks for Data Tier really existing in RefDB, otherwise give a warning message |
122 |
|
for dt in self.dataTiers: |
123 |
|
if refdbdataTiers.count(dt)<=0: |
124 |
< |
msg = "ERROR: Data Tier ( =>",dt,"<= ) not existing for dataset/owner "+ self.dataset+"/"+self.owner+"!" |
125 |
< |
msg = msg + "Check the data_tier variable in crab.cfg" |
116 |
< |
msg = msg + 'Owner Dataset not published with asked dataTiers! '+\ |
124 |
> |
msg = "ERROR: Data Tier ( =>",dt,"<= ) not existing for dataset/owner "+self.dataset+"/"+self.owner+"! " |
125 |
> |
msg = str(msg) + 'Owner Dataset not published with asked dataTiers! '+\ |
126 |
|
self.owner+' '+ self.dataset+' '+self.dataTiers |
127 |
< |
common.logger.message(msg) |
127 |
> |
msg = str(msg) + ' Check the data_tier variable in crab.cfg !\n' |
128 |
> |
common.logger.message(msg) |
129 |
|
return [] |
130 |
|
|
131 |
|
#print 'Needed Collections are ', NeededCollID |
189 |
|
SelectedPubDBURLs.append(url) |
190 |
|
|
191 |
|
#print 'Required Collections',CollIDs,'are all present in PubDBURLs : ',SelectedPubDBURLs,'\n' |
182 |
– |
#return SelectedPubDBURLs |
192 |
|
#### check based on CE black list: select only PubDB not in the CE black list |
193 |
|
GoodPubDBURLs=self.checkBlackList(SelectedPubDBURLs) |
194 |
|
return GoodPubDBURLs |
206 |
|
####################################################################### |
207 |
|
def checkBlackList(self, pubDBUrls): |
208 |
|
""" |
209 |
< |
select PubDB URLs that are at site not excluded by the user (via CE black list) |
209 |
> |
select PubDB URLs that are at site not exluded by the user (via CE black list) |
210 |
|
""" |
211 |
|
goodurls = [] |
212 |
|
for url in pubDBUrls: |
213 |
< |
print 'connecting to the URL ',url |
213 |
> |
common.logger.debug(10,'connecting to the URL '+url) |
214 |
|
good=1 |
215 |
|
for re in self.reCEBlackList: |
216 |
|
if re.search(url): |
221 |
|
if len(goodurls) == 0: |
222 |
|
common.logger.debug(3,"No selected PubDB URLs") |
223 |
|
return goodurls |
224 |
< |
|
224 |
> |
|
225 |
> |
######################################################################## |
226 |
> |
def checkPubDBNewVersion(self, baseurl): |
227 |
> |
""" |
228 |
> |
Check PubDB version to find out if it's new-style or old-style |
229 |
> |
""" |
230 |
> |
### check based on the existance of pubdb-get-version.php |
231 |
> |
urlversion=baseurl+'pubdb-get-version.php' |
232 |
> |
newversion=1; |
233 |
> |
try: |
234 |
> |
v = urllib2.urlopen(urlversion) |
235 |
> |
except urllib2.URLError, msg: |
236 |
> |
#print "WARNING: no URL to get PubDB version " |
237 |
> |
newversion=0; |
238 |
> |
|
239 |
> |
if (newversion) : |
240 |
> |
schemaversion = v.read() |
241 |
> |
#print schemaversion; |
242 |
> |
|
243 |
> |
return newversion |
244 |
> |
|
245 |
|
######################################################################## |
246 |
< |
def getPubDBData(self, CollIDs, url): |
246 |
> |
def getPubDBData(self, CollIDs, url , newversion): |
247 |
|
""" |
248 |
|
Contact a PubDB to collect all the relevant information |
249 |
|
""" |
250 |
|
result = [] |
251 |
< |
for CollID in CollIDs: |
252 |
< |
end=string.rfind(url,'/') |
253 |
< |
lastEq=string.rfind(url,'=') |
251 |
> |
|
252 |
> |
### get the base PubDb url |
253 |
> |
end=string.rfind(url,'/') |
254 |
> |
lastEq=string.rfind(url,'=') |
255 |
> |
|
256 |
> |
if (newversion) : |
257 |
> |
### from PubDB V4 : get info for all the collections in one shot and unserialize the content |
258 |
> |
Collections=string.join(CollIDs,'-') |
259 |
> |
## add the PU among the required Collections if the Digi are requested |
260 |
> |
# ( for the time being asking it directly to the PubDB so the RefDB |
261 |
> |
# level data discovery is bypassed..... in future when every site |
262 |
> |
# will have the new style it will be possible to ask for PU , at RefDB level, in method findAllCollections ) |
263 |
> |
if ( self.NeededdataTiers.count('Digi') ): |
264 |
> |
PUCollID=self.getDatatierCollID(url[:end+1],Collections,"PU") |
265 |
> |
if (PUCollID) : CollIDs.append(PUCollID) |
266 |
> |
## |
267 |
> |
Collections=string.join(CollIDs,'-') |
268 |
> |
### download from PubDB all the info about the given collections |
269 |
> |
pubdb_analysis=PubDBInfo(url[:end+1],Collections) |
270 |
> |
#print pubdb_analysis.GetPubDBInfo() |
271 |
> |
ok=0 |
272 |
> |
try: |
273 |
> |
catInfos=pubdb_analysis.GetPubDBInfo() |
274 |
> |
ok=1 |
275 |
> |
except : |
276 |
> |
#print "WARNING: can't get PubDB content out of "+url[:end+1]+"\n" |
277 |
> |
print '\nERROR extracting info for collections '+Collections+' from PubDB '+url[:end+1]+'. The PU might not be published at that site.\n' |
278 |
> |
#raise PubDBGetAnalysisError(url[:end+1],Collections) |
279 |
> |
if (ok): result=catInfos; |
280 |
> |
|
281 |
> |
else: |
282 |
> |
|
283 |
> |
### before PubDB V4 : get info for each collection and read the key-value pair text |
284 |
> |
|
285 |
> |
for CollID in CollIDs: |
286 |
|
urlphp=url[:end+1]+self.PubDBAnalysisPhp_+'?CollID='+CollID |
287 |
|
# print 'PHP URL: '+urlphp+' \n' |
288 |
< |
|
288 |
> |
|
289 |
|
reOld=re.compile( r'V24' ) |
290 |
|
#print urlphp,'Old PubDB ',reOld.search(urlphp) |
291 |
|
if reOld.search(urlphp): |
310 |
|
# r.dump() |
311 |
|
#print '.....' |
312 |
|
return result |
313 |
< |
|
313 |
> |
|
314 |
> |
######################################################################## |
315 |
> |
def getDatatierCollID(self,urlbase,CollIDString,datatier): |
316 |
> |
""" |
317 |
> |
Contact a script of PubDB to retrieve the collid a DataTier |
318 |
> |
""" |
319 |
> |
try: |
320 |
> |
f = urllib.urlopen(urlbase+'pubdb-get-collidbydatatier.php?collid='+CollIDString+"&datatier="+datatier) |
321 |
> |
except IOError: |
322 |
> |
raise PubDBGetAnalysisError(url[:end+1]+'pubdb-get-collidbydatatier.php',CollIDString) |
323 |
> |
data = f.read() |
324 |
> |
colldata=re.compile(r'collid=(\S*)').search(data); |
325 |
> |
if colldata: |
326 |
> |
datatier_CollID=colldata.group(1) |
327 |
> |
# print " --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier |
328 |
> |
common.logger.message(" --> asking to PubDB "+urlbase+" for an additional collection : ID= "+datatier_CollID+" DataTier= "+datatier) |
329 |
> |
|
330 |
> |
return datatier_CollID |
331 |
> |
|
332 |
|
######################################################################## |
333 |
< |
def getAllPubDBData(self, CollIDs, urllist): |
333 |
> |
def getAllPubDBData(self): |
334 |
|
""" |
335 |
|
Contact a list of PubDB to collect all the relevant information |
336 |
|
""" |
337 |
< |
completeResult=[] |
337 |
> |
newPubDBResult=[] |
338 |
> |
oldPubDBResult=[] |
339 |
> |
Result={} |
340 |
> |
|
341 |
> |
### find the user-required collection IDs |
342 |
> |
CollIDs = self.findAllCollections() |
343 |
> |
### find the PubDB URLs publishing the needed data |
344 |
> |
urllist = self.findPubDBs(CollIDs) |
345 |
> |
### collect information sparatelly from new-style PubDBs and old-style PubDBs |
346 |
|
for pubdburl in urllist: |
347 |
< |
completeResult.append(self.getPubDBData(CollIDs,pubdburl)) |
348 |
< |
|
347 |
> |
end=string.rfind(pubdburl,'/') |
348 |
> |
newversion=self.checkPubDBNewVersion(pubdburl[:end+1]) |
349 |
> |
if (newversion): |
350 |
> |
res=self.getPubDBData(CollIDs,pubdburl,newversion) |
351 |
> |
if len(res)>0: |
352 |
> |
newPubDBResult.append(res) |
353 |
> |
else: |
354 |
> |
resold=self.getPubDBData(CollIDs,pubdburl,newversion) |
355 |
> |
if len(resold)>0: |
356 |
> |
oldPubDBResult.append(resold) |
357 |
> |
### fill a dictionary with all the PubBDs results both old-style and new-style |
358 |
> |
Result['newPubDB']=newPubDBResult |
359 |
> |
Result['oldPubDB']=oldPubDBResult |
360 |
> |
|
361 |
|
## print for debugging purpose |
362 |
< |
#for result in completeResult: |
363 |
< |
# print '..... PubDB Site URL :',pubdburl |
364 |
< |
# for r in result: |
365 |
< |
# r.dump() |
366 |
< |
# print '.....................................' |
367 |
< |
|
368 |
< |
return completeResult |
362 |
> |
# |
363 |
> |
#for PubDBversion in Result.keys(): |
364 |
> |
#print ("key %s, val %s" %(PubDBversion,Result[PubDBversion])) |
365 |
> |
# if len(Result[PubDBversion])>0 : |
366 |
> |
#print (" key %s"%(PubDBversion)) |
367 |
> |
# for result in Result[PubDBversion]: |
368 |
> |
# for r in result: |
369 |
> |
#r.dump() |
370 |
> |
# common.log.write('----------------- \n') |
371 |
> |
#print '.....................................' |
372 |
> |
|
373 |
> |
return Result |
374 |
> |
|
375 |
|
#################################################################### |