1 |
from xml.dom import minidom
|
2 |
from xml.dom.minidom import parseString
|
3 |
from optparse import OptionParser
|
4 |
from itertools import groupby
|
5 |
import re, os, sys, commands, operator
|
6 |
|
7 |
CASTOR="castor source"
|
8 |
LOCAL="local source"
|
9 |
DQM_SERVER="dqm gui server source"
|
10 |
FATAL_ERROR="Fatal error: "
|
11 |
ERROR="Error: "
|
12 |
DESCRIPTION= "%prog is a simple interface to parse an XML ROOT file descriptor \
|
13 |
(DQM Reference Histogram format compliant) and download ROOT files by creating optimized queries \
|
14 |
for the DQM GUI dqm-access tool."
|
15 |
EPILOG = "Before using source the DQM GUI environment variables: \
|
16 |
source $[YOUR_DQM_GUI_INSTALLATION]/current/apps/dqmgui/etc/profile.d/env.sh \
|
17 |
and create a valid proxy by providing your Grid credentials: \
|
18 |
source /afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.sh && voms-proxy-init"
|
19 |
options=None
|
20 |
error_msg=''
|
21 |
|
22 |
#-------------------------------------------------------------------------------
|
23 |
def get_attribute_by_name(elelist, name):
|
24 |
"""
|
25 |
Returns an attribute value for a given name in a given element list.
|
26 |
"""
|
27 |
|
28 |
try:
|
29 |
elelist.attributes[name].value
|
30 |
except KeyError:
|
31 |
return None
|
32 |
else:
|
33 |
return elelist.attributes[name].value
|
34 |
|
35 |
#-------------------------------------------------------------------------------
|
36 |
def get_elements_by_tag_name(xmldoc, tag):
|
37 |
"""
|
38 |
Returns a list of elements for a given tag name.
|
39 |
"""
|
40 |
|
41 |
res = xmldoc.getElementsByTagName(tag)
|
42 |
if not res:
|
43 |
print >> sys.stderr, "%s XML descriptor does NOT have a tag named: %s" %(FATAL_ERROR, tag)
|
44 |
sys.exit(1)
|
45 |
return res
|
46 |
|
47 |
#-------------------------------------------------------------------------------
|
48 |
def get_dqm_access_header():
|
49 |
"""
|
50 |
Returns the dqm-access command header.
|
51 |
"""
|
52 |
|
53 |
return "dqm-access -c -w -l -r"
|
54 |
|
55 |
#-------------------------------------------------------------------------------
|
56 |
def get_source_type(source):
|
57 |
"""
|
58 |
Returns the source type for a given source.
|
59 |
"""
|
60 |
|
61 |
if(source.startswith("rfio")):
|
62 |
return CASTOR
|
63 |
elif(source.startswith("DQM")):
|
64 |
return LOCAL
|
65 |
else:
|
66 |
return DQM_SERVER
|
67 |
|
68 |
#-------------------------------------------------------------------------------
|
69 |
def get_local_source(source):
|
70 |
"""
|
71 |
Returns the formatted local source for a given source.
|
72 |
"""
|
73 |
|
74 |
res = re.search('/Reference/(.*)',source)
|
75 |
if res:
|
76 |
return res.group(1)
|
77 |
else:
|
78 |
if (source[0]=='/'):
|
79 |
source = str(source).split("/",1)[1]
|
80 |
return "DQM_V0001_R000"+str(source).split("/")[0]+"__"+str(source).split("/")[1]+"__"+str(source).split("/")[2]+"__DQM.root"
|
81 |
|
82 |
#-------------------------------------------------------------------------------
|
83 |
def get_source_values(source):
|
84 |
"""
|
85 |
Returns run, main dataset, and dataset values for a given source.
|
86 |
"""
|
87 |
|
88 |
source = get_local_source(source)
|
89 |
res = re.search('(\d{6})__(.*)__(.*)', source)
|
90 |
return (res.group(1), str(res.group(2)).split('__')[0], str(res.group(2)).split('__')[1])
|
91 |
|
92 |
#-------------------------------------------------------------------------------
|
93 |
def get_dqm_access_expression(source):
|
94 |
"""
|
95 |
Returns a dqm-access expression and the target filename.
|
96 |
"""
|
97 |
|
98 |
(run, mdataset, dataset) = get_source_values(source)
|
99 |
exp = " -e 'run == " +run+ " and match(\"/"+mdataset+"/"+dataset+"/DQM\", dataset)'"
|
100 |
return (exp, source)
|
101 |
|
102 |
#-------------------------------------------------------------------------------
|
103 |
def get_dqm_access_filters(name, reflist):
|
104 |
"""
|
105 |
Returns the dqm-access valid filters to be proccessed in the query.
|
106 |
"""
|
107 |
|
108 |
filopt = " -f '/"
|
109 |
filters =""
|
110 |
for j in range(reflist.length):
|
111 |
fil=name+"/"+str(reflist[j].attributes["name"].value)
|
112 |
filtersplited = fil.split("/")
|
113 |
lastfolder = str(filtersplited[-1])
|
114 |
if(lastfolder==".*"):
|
115 |
filters+= filopt+str(fil).replace(lastfolder,"***'")
|
116 |
else:
|
117 |
filters+= filopt+str(fil).replace(lastfolder,"*"+lastfolder+"*'")
|
118 |
return filters
|
119 |
|
120 |
#-------------------------------------------------------------------------------
|
121 |
def get_command_list(exelist):
|
122 |
"""
|
123 |
Returns an optimized dqm-access command list to be proccessed.
|
124 |
"""
|
125 |
|
126 |
exlist = {}
|
127 |
comlist = []
|
128 |
for key, grp in groupby(sorted(exelist), key=operator.itemgetter(0)):
|
129 |
exlist[key]= map(operator.itemgetter(1), grp)
|
130 |
for expsrc,filters in exlist.iteritems():
|
131 |
(exp, src) = expsrc
|
132 |
downcom = get_dqm_access_header() + get_dqm_access_server()+ exp
|
133 |
for x in range(len(filters)):
|
134 |
downcom+=filters[x]
|
135 |
comlist.append((downcom,src))
|
136 |
return comlist
|
137 |
|
138 |
#-------------------------------------------------------------------------------
|
139 |
def get_dqm_access_server():
|
140 |
"""
|
141 |
Returns the dqm-access server parameter.
|
142 |
"""
|
143 |
|
144 |
return " -s https://cmsweb.cern.ch/dqm/offline/data/json"
|
145 |
|
146 |
#-------------------------------------------------------------------------------
|
147 |
def update_xml(filename, filebody):
|
148 |
"""
|
149 |
Writes a given file with a given body.
|
150 |
"""
|
151 |
|
152 |
if(os.path.isfile(filename)):
|
153 |
nfile = open(filename, 'w')
|
154 |
nfile.write(filebody)
|
155 |
nfile.close()
|
156 |
else:
|
157 |
print >> sys.stderr, "%s File %s does NOT exist" %(FATAL_ERROR, filename)
|
158 |
sys.exit(1)
|
159 |
|
160 |
#-------------------------------------------------------------------------------
|
161 |
def validate_xml(xml):
|
162 |
"""
|
163 |
Validates whether the xml file descriptor given is valid, else the program finish.
|
164 |
"""
|
165 |
|
166 |
try:
|
167 |
import libxml2
|
168 |
nfile = open(xml, 'r')
|
169 |
data = nfile.read()
|
170 |
nfile.close()
|
171 |
dom = parseString(data)
|
172 |
libxml2.registerErrorHandler(get_xmlvalidation_error, None)
|
173 |
ctxt = libxml2.createDocParserCtxt(dom.toxml())
|
174 |
ctxt.validate(1)
|
175 |
ctxt.parseDocument()
|
176 |
doc = ctxt.doc()
|
177 |
valid = ctxt.isValid()
|
178 |
doc.freeDoc()
|
179 |
if valid == 1:
|
180 |
global options
|
181 |
if options.verbose:
|
182 |
print "- XML file descriptor is valid."
|
183 |
else :
|
184 |
try:
|
185 |
print >> sys.stderr, "%s XML file descriptor is NOT valid: %s" (FATAL_ERROR, error_msg)
|
186 |
sys.exit(1)
|
187 |
except:
|
188 |
print >> sys.stderr, "%s XML file descriptor is NOT valid" %FATAL_ERROR
|
189 |
sys.exit(1)
|
190 |
except ImportError, e:
|
191 |
print >> sys.stderr, "%s libxml2 library is NOT available: %s" %(FATAL_ERROR, e)
|
192 |
sys.exit(1)
|
193 |
|
194 |
#-------------------------------------------------------------------------------
|
195 |
def get_xmlvalidation_error(ctx, strg):
|
196 |
"""
|
197 |
Sets the xml validation error in a global variable.
|
198 |
"""
|
199 |
|
200 |
global error_msg
|
201 |
error_msg += strg+"\n"
|
202 |
|
203 |
#-------------------------------------------------------------------------------
|
204 |
def get_options():
|
205 |
"""
|
206 |
Returns an object with the processed parameters received as options.
|
207 |
"""
|
208 |
|
209 |
opt = OptionParser(description = DESCRIPTION, epilog=EPILOG)
|
210 |
opt.add_option("-v", "--verbose", dest = "verbose",
|
211 |
action = "store_true", default = False,
|
212 |
help = "Show verbose scan information.")
|
213 |
opt.add_option("-x", "--xml", dest = "xmlfile",
|
214 |
help = "XML file that describes root files to download.")
|
215 |
options, args = opt.parse_args()
|
216 |
if args:
|
217 |
print >> sys.stderr, "%s Too many arguments." %FATAL_ERROR
|
218 |
sys.exit(1)
|
219 |
if not options.xmlfile:
|
220 |
print >> sys.stderr, "%s XML filename required." %FATAL_ERROR
|
221 |
sys.exit(1)
|
222 |
if not os.path.isfile(options.xmlfile):
|
223 |
print >> sys.stderr, "%s XML file does NOT exist." %FATAL_ERROR
|
224 |
sys.exit(1)
|
225 |
return options
|
226 |
|
227 |
#-------------------------------------------------------------------------------
|
228 |
exelist = []
|
229 |
options = get_options()
|
230 |
validate_xml(options.xmlfile)
|
231 |
xmldoc = minidom.parse(options.xmlfile)
|
232 |
print "- Processing valid XML file descriptor: %s\n" % options.xmlfile
|
233 |
|
234 |
#Gets a list of all subdetectors in the XML file descriptor.
|
235 |
sublist = get_elements_by_tag_name(xmldoc, 'SubDetectors')
|
236 |
|
237 |
#Parses the subdetector list in order to find DQM GUI server sources.
|
238 |
#LOCAL sources are processed by DQMAssembleReferenceHistos as well as CASTOR sources,
|
239 |
#due to users asked to be able to submit private DQM reprocessing histograms using exclusively CASTOR files.
|
240 |
for subdetector in sublist:
|
241 |
name = get_attribute_by_name(subdetector, "name")
|
242 |
source = get_attribute_by_name(subdetector, "source")
|
243 |
#*Temporal hard-coded source change in order to handle with a CASTOR Pixel payload submitted just before the new CASTOR source rules.
|
244 |
if source == "rfio:///castor/cern.ch/cms/store/group-tape/tracker/dqm/Reference/DQM_V0001_R000194317__MinimumBias__Run2012B-PromptReco-v1__DQM.root":
|
245 |
tsource = "194317/MinimumBias/Run2012B-PromptReco-v1/DQM"
|
246 |
update_xml(options.xmlfile,xmldoc.toxml("UTF-8").replace(source, tsource))
|
247 |
source =tsource
|
248 |
#*
|
249 |
if(get_source_type(source)==DQM_SERVER):
|
250 |
reflist = get_elements_by_tag_name(subdetector, 'ReferenceEntity')
|
251 |
exelist.append((get_dqm_access_expression(source), get_dqm_access_filters(name, reflist)))
|
252 |
else:
|
253 |
print "ROOT file: %s \n was skipped since is NOT in the DQM GUI server, it will be processed by DQMAssembleReferenceHistos\n" %source
|
254 |
|
255 |
#Executes a dqm-access query for each DQM GUI server source found.
|
256 |
if exelist:
|
257 |
comlist = get_command_list(exelist)
|
258 |
for command in comlist:
|
259 |
(downcom, source) = command
|
260 |
nsource = get_local_source(source)
|
261 |
(downstatus, downoutput) = commands.getstatusoutput(downcom)
|
262 |
if options.verbose:
|
263 |
print "- Executing the command: %s\n" %downcom + "dqm-access output: \n%s\n" % downoutput
|
264 |
if(downstatus == 0 and not "Found 0 objects" in downoutput):
|
265 |
xmldoc = minidom.parse(options.xmlfile)
|
266 |
update_xml(options.xmlfile,xmldoc.toxml("UTF-8").replace(source, nsource))
|
267 |
print "ROOT file: %s was successfully produced" %nsource
|
268 |
elif (downstatus != 0 ):
|
269 |
print >> sys.stderr, "%s ROOT file %s \n was NOT produced since dqm-access is NOT working, \n\
|
270 |
--help option for more information\n" %(ERROR, nsource)
|
271 |
elif ("Found 0 objects" in downoutput ):
|
272 |
print >> sys.stderr, "%s ROOT file: %s \n was NOT found in the DQM GUI server\n" %(ERROR, nsource) |