1 |
#!/usr/bin/env python
|
2 |
import sys, os, string, re
|
3 |
import xml.sax
|
4 |
import urllib
|
5 |
import DBSXMLHandler
|
6 |
|
7 |
class DBSError:
|
8 |
def __init__(self, owner, dataset):
|
9 |
print '\nERROR accessing DBS for Owner/Dataset: '+owner+'/'+dataset+'\n'
|
10 |
pass
|
11 |
|
12 |
class DBSInfoError:
|
13 |
def __init__(self, url):
|
14 |
print '\nERROR accessing DBS url : '+url+'\n'
|
15 |
pass
|
16 |
|
17 |
##################################################################################
|
18 |
# Class to extract info from DBS
|
19 |
###############################################################################
|
20 |
|
21 |
class DBSInfo:
|
22 |
def __init__(self, owner, dataset, dataTiers):
|
23 |
self.owner = owner
|
24 |
self.dataset = dataset
|
25 |
self.dataTiers = dataTiers
|
26 |
## DBSXMLDump
|
27 |
self.DBSclient_ = '/phedex/SL/PHEDEX_head/Utilities/DBSXMLDump -from DBS '
|
28 |
self.DBSParam_ = '~/.globus/DBS_DBParam:Production/Admin'
|
29 |
self.DBSDumpbase_ = self.DBSclient_+" -from DBS -db "+self.DBSParam_
|
30 |
## HTTP/CGI
|
31 |
self.DBSURL_='http://cern.ch/cms-dbs/cgi-bin/dbsxml?api='
|
32 |
|
33 |
# ####################################
|
34 |
def getDatasetProvenance(self):
|
35 |
"""
|
36 |
query DBS to get provenance
|
37 |
"""
|
38 |
## DBSXMLDump
|
39 |
xmlfile= 'parent.xml'
|
40 |
ListdataTiers=string.join(self.dataTiers,',')
|
41 |
#print ListdataTiers
|
42 |
print "DBSInfo: ---> getDatasetProvenance : "+self.owner+"/"+self.dataset+" with datatier "+ListdataTiers
|
43 |
cmd = self.DBSDumpbase_+" -to "+xmlfile+" -datatier "+ListdataTiers+" -getDatasetProvenance \'"+self.owner+"/"+self.dataset+"\'"
|
44 |
#print "DBSInfo: executing "+cmd
|
45 |
os.system(cmd)
|
46 |
|
47 |
### parse the XML
|
48 |
handler = self.XMLparsing(xmlfile)
|
49 |
os.system('rm '+xmlfile)
|
50 |
|
51 |
print "DBSInfo: parents are %s"%handler.parentsList
|
52 |
|
53 |
for aparent in handler.parent.keys():
|
54 |
print "DBSInfo: parent is "+aparent+" type: "+handler.parent[aparent]
|
55 |
|
56 |
## return a map with parent path and type : should I return just a list with parent paths?
|
57 |
return handler.parent
|
58 |
|
59 |
|
60 |
# ####################################
|
61 |
def getDatasetContents(self):
|
62 |
"""
|
63 |
query DBS to get event collections
|
64 |
"""
|
65 |
print "DBSInfo: ---> getDatasetContents : "+self.dataset+"/"+self.owner
|
66 |
try:
|
67 |
url=self.DBSURL_+'getDatasetContents&path='+self.dataset+'/datatier/'+self.owner
|
68 |
f = urllib.urlopen(url)
|
69 |
except:
|
70 |
raise DBSInfoError(url)
|
71 |
|
72 |
data = f.read()
|
73 |
xmlfile = "evc.xml"
|
74 |
file = open(xmlfile, 'w')
|
75 |
file.write(data)
|
76 |
file.close()
|
77 |
|
78 |
### parse the XML
|
79 |
handler = self.XMLparsing(xmlfile)
|
80 |
os.system('rm '+xmlfile)
|
81 |
|
82 |
## get the fileblock and event collections
|
83 |
nevtsbyblock= {}
|
84 |
print "DBSInfo: fileblocks are: %s"%handler.fileblocksList
|
85 |
|
86 |
for block in handler.fileblocksList:
|
87 |
print "DBSInfo: --- block: "+block
|
88 |
## get the event collections for each block
|
89 |
evcnames=handler.evcbyfileblock[block].keys()
|
90 |
evcnames.sort()
|
91 |
nevts=0
|
92 |
for evcname in evcnames:
|
93 |
print "DBSInfo: evc: "+evcname+" nevts: "+handler.evcbyfileblock[block][evcname]
|
94 |
nevts=nevts+int(handler.evcoll[evcname])
|
95 |
print "DBSInfo: total nevts %i in block %s "%(nevts,block)
|
96 |
nevtsbyblock[block]=nevts
|
97 |
|
98 |
print "DBSInfo: total number of events %i"%handler.totnevts
|
99 |
# returning a map of fileblock-nevts will be enough for now
|
100 |
# TODO: in future the EvC collections grouped by fileblock should be returned
|
101 |
|
102 |
return nevtsbyblock
|
103 |
|
104 |
|
105 |
# ####################################
|
106 |
def XMLparsing(self, xmlfile):
|
107 |
"""
|
108 |
parse XML
|
109 |
"""
|
110 |
#print "\n DBSInfo: Parsing XML file "+xmlfile+"\n"
|
111 |
parser = xml.sax.make_parser()
|
112 |
handler = DBSXMLHandler.Handler()
|
113 |
parser.setContentHandler(handler)
|
114 |
parser.parse(xmlfile)
|
115 |
return handler
|
116 |
|