2 |
|
import sys, os, string, re |
3 |
|
import xml.sax |
4 |
|
import urllib |
5 |
< |
import DBSXMLHandler |
5 |
> |
|
6 |
> |
import sys |
7 |
> |
sys.path.append('./DBSAPI') |
8 |
> |
import dbsCgiApi |
9 |
|
|
10 |
|
class DBSError: |
11 |
|
def __init__(self, owner, dataset): |
26 |
|
self.owner = owner |
27 |
|
self.dataset = dataset |
28 |
|
self.dataTiers = dataTiers |
29 |
< |
## DBSXMLDump |
30 |
< |
self.DBSclient_ = '/phedex/SL/PHEDEX_head/Utilities/DBSXMLDump -from DBS ' |
31 |
< |
self.DBSParam_ = '~/.globus/DBS_DBParam:Production/Admin' |
29 |
< |
self.DBSDumpbase_ = self.DBSclient_+" -from DBS -db "+self.DBSParam_ |
30 |
< |
## HTTP/CGI |
31 |
< |
self.DBSURL_='http://cern.ch/cms-dbs/cgi-bin/dbsxml?api=' |
29 |
> |
self.dbspath=dataset+'/datatier/'+owner |
30 |
> |
|
31 |
> |
self.api = dbsCgiApi.DbsCgiApi(cgiUrl="http://cern.ch/cms-dbs/cgi-bin") |
32 |
|
|
33 |
|
# #################################### |
34 |
|
def getDatasetProvenance(self): |
35 |
|
""" |
36 |
|
query DBS to get provenance |
37 |
|
""" |
38 |
< |
## DBSXMLDump |
39 |
< |
xmlfile= 'parent.xml' |
40 |
< |
ListdataTiers=string.join(self.dataTiers,',') |
41 |
< |
#print ListdataTiers |
42 |
< |
print "DBSInfo: ---> getDatasetProvenance : "+self.owner+"/"+self.dataset+" with datatier "+ListdataTiers |
43 |
< |
cmd = self.DBSDumpbase_+" -to "+xmlfile+" -datatier "+ListdataTiers+" -getDatasetProvenance \'"+self.owner+"/"+self.dataset+"\'" |
44 |
< |
#print "DBSInfo: executing "+cmd |
45 |
< |
os.system(cmd) |
46 |
< |
|
47 |
< |
### parse the XML |
48 |
< |
handler = self.XMLparsing(xmlfile) |
49 |
< |
os.system('rm '+xmlfile) |
50 |
< |
|
51 |
< |
print "DBSInfo: parents are %s"%handler.parentsList |
52 |
< |
|
53 |
< |
for aparent in handler.parent.keys(): |
54 |
< |
print "DBSInfo: parent is "+aparent+" type: "+handler.parent[aparent] |
55 |
< |
|
56 |
< |
## return a map with parent path and type : should I return just a list with parent paths? |
57 |
< |
return handler.parent |
38 |
> |
datasetParentList = self.api.getDatasetProvenance(self.dbspath,self.dataTiers) |
39 |
> |
|
40 |
> |
parent = {} |
41 |
> |
for aparent in datasetParentList: |
42 |
> |
print "DBSInfo: parent path is "+aparent.getDatasetPath()+" datatier is: "+aparent.getDataTier() |
43 |
> |
parent[aparent.getDatasetPath()]=aparent.getDataTier() |
44 |
|
|
45 |
+ |
return parent |
46 |
|
|
47 |
|
# #################################### |
48 |
|
def getDatasetContents(self): |
49 |
|
""" |
50 |
|
query DBS to get event collections |
51 |
|
""" |
52 |
< |
print "DBSInfo: ---> getDatasetContents : "+self.dataset+"/"+self.owner |
53 |
< |
try: |
67 |
< |
url=self.DBSURL_+'getDatasetContents&path='+self.dataset+'/datatier/'+self.owner |
68 |
< |
f = urllib.urlopen(url) |
69 |
< |
except: |
70 |
< |
raise DBSInfoError(url) |
71 |
< |
|
72 |
< |
data = f.read() |
73 |
< |
xmlfile = "evc.xml" |
74 |
< |
file = open(xmlfile, 'w') |
75 |
< |
file.write(data) |
76 |
< |
file.close() |
77 |
< |
|
78 |
< |
### parse the XML |
79 |
< |
handler = self.XMLparsing(xmlfile) |
80 |
< |
os.system('rm '+xmlfile) |
52 |
> |
|
53 |
> |
fileBlockList = self.api.getDatasetContents(self.dbspath) |
54 |
|
|
55 |
|
## get the fileblock and event collections |
56 |
|
nevtsbyblock= {} |
57 |
< |
print "DBSInfo: fileblocks are: %s"%handler.fileblocksList |
57 |
> |
for fileBlock in fileBlockList: |
58 |
> |
## get the event collections for each block |
59 |
> |
#print fileBlock.getBlockName() |
60 |
> |
#print fileBlock.getBlockId() |
61 |
> |
eventCollectionList = fileBlock.getEventCollectionList() |
62 |
> |
nevts=0 |
63 |
> |
for eventCollection in eventCollectionList: |
64 |
> |
#print "DBSInfo: evc: "+eventCollection.getCollectionName()+" nevts: %i"%eventCollection.getNumberOfEvents() |
65 |
> |
nevts=nevts+eventCollection.getNumberOfEvents() |
66 |
> |
print "DBSInfo: total nevts %i in block %s "%(nevts,fileBlock.getBlockName()) |
67 |
> |
nevtsbyblock[fileBlock.getBlockName()]=nevts |
68 |
|
|
86 |
– |
for block in handler.fileblocksList: |
87 |
– |
print "DBSInfo: --- block: "+block |
88 |
– |
## get the event collections for each block |
89 |
– |
evcnames=handler.evcbyfileblock[block].keys() |
90 |
– |
evcnames.sort() |
91 |
– |
nevts=0 |
92 |
– |
for evcname in evcnames: |
93 |
– |
print "DBSInfo: evc: "+evcname+" nevts: "+handler.evcbyfileblock[block][evcname] |
94 |
– |
nevts=nevts+int(handler.evcoll[evcname]) |
95 |
– |
print "DBSInfo: total nevts %i in block %s "%(nevts,block) |
96 |
– |
nevtsbyblock[block]=nevts |
97 |
– |
|
98 |
– |
print "DBSInfo: total number of events %i"%handler.totnevts |
69 |
|
# returning a map of fileblock-nevts will be enough for now |
70 |
|
# TODO: in future the EvC collections grouped by fileblock should be returned |
71 |
< |
|
71 |
> |
|
72 |
|
return nevtsbyblock |
73 |
|
|
104 |
– |
|
105 |
– |
# #################################### |
106 |
– |
def XMLparsing(self, xmlfile): |
107 |
– |
""" |
108 |
– |
parse XML |
109 |
– |
""" |
110 |
– |
#print "\n DBSInfo: Parsing XML file "+xmlfile+"\n" |
111 |
– |
parser = xml.sax.make_parser() |
112 |
– |
handler = DBSXMLHandler.Handler() |
113 |
– |
parser.setContentHandler(handler) |
114 |
– |
parser.parse(xmlfile) |
115 |
– |
return handler |
116 |
– |
|