ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/RateMonShiftTool_dev/AndrewWBMParser.py
Revision: 1.3
Committed: Wed Oct 31 16:26:50 2012 UTC (12 years, 5 months ago) by awoodard
Content type: text/x-python
Branch: MAIN
CVS Tags: HEAD
Changes since 1.2: +0 -0 lines
State: FILE REMOVED
Log Message:
depreciated

File Contents

# Content
1 from HTMLParser import HTMLParser
2 from urllib2 import urlopen
3 import cPickle as pickle
4 import os, sys
5 import time
6 import re
7
8 ### need to overwrite some functions in the HTMLParser library
9 locatestarttagend = re.compile(r"""
10 <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
11 (?:\s+ # whitespace before attribute name
12 (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
13 (?:\s*=\s* # value indicator
14 (?:'[^']*' # LITA-enclosed value
15 |\"[^\"]*\" # LIT-enclosed value
16 |this.src='[^']*' # hack
17 |[^'\">\s]+ # bare value
18 )
19 )?
20 )
21 )*
22 \s* # trailing whitespace
23 """, re.VERBOSE)
24
25 tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
26 attrfind = re.compile(
27 r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
28 r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
29
30 class AndrewWBMParser(HTMLParser):
31
32 def __init__(self):
33 HTMLParser.__init__(self)
34 self.InRow=0
35 self.InEntry=0
36 self.table = []
37 self.tmpRow = []
38 self.hyperlinks = []
39
40 ##-- Defined in ParsePage1 --##
41 self.RunNumber = 0
42
43 ##-- Defined in ParseRunPage --##
44 self.RatePage = ''
45 self.LumiPage = ''
46 self.L1Page=''
47 self.PrescaleChangesPage=''
48 self.TriggerModePage=''
49 self.Date=''
50 self.HLT_Key=''
51
52 ##-- Defined in ParseHLTSummaryPage --##
53 self.TriggerRates = {}
54
55 ##-- Defined in ParseLumiPage --##
56 self.LSByLS = []
57 self.InstLumiByLS = {}
58 self.DeliveredLumiByLS = {}
59 self.LiveLumiByLS = {}
60 self.PSColumnByLS = {}
61 self.PrescaleColumnString=''
62 self.AvInstLumi = 0
63 self.AvDeliveredLumi = 0
64 self.AvLiveLumi = 0
65 self.LumiInfo = [] ##Returns
66
67 ##-- Defined in ParseL1Page (not currently used) --##
68 self.L1Rates={} ##Returns
69
70 ##-- Defined in ParsePSColumnPage (not currently used) --##
71 self.PSColumnChanges=[] ##Returns
72
73 ##-- Defined in ParseTriggerModePage --##
74 self.L1TriggerMode={}
75 self.HLTTriggerMode={}
76 self.HLTSeed={}
77 self.TriggerInfo = [] ##Returns
78
79 ##-- Defined in AssemblePrescaleValues --##
80 self.L1Prescale={}
81 self.HLTPrescale={}
82 self.MissingPrescale=[]
83 self.PrescaleValues=[] ##Returns
84
85 ##-- Defined in ComputeTotalPrescales --##
86 self.TotalPSInfo = [] ##Returns
87
88 ##-- Defined in CorrectForPrescaleChange --##
89 self.CorrectedPSInfo = [] ##Returns
90
91 ##-- In the current Parser.py philosophy, only RunNumber is set globally
92 ## - LS range is set from the outside for each individual function
93 #self.FirstLS = -1
94 #self.LastLS = -1
95
96
97 def parse_starttag(self, i): ## Overwrite function from HTMLParser
98 self.__starttag_text = None
99 endpos = self.check_for_whole_start_tag(i)
100 if endpos < 0:
101 return endpos
102 rawdata = self.rawdata
103 self.__starttag_text = rawdata[i:endpos]
104
105 # Now parse the data between i+1 and j into a tag and attrs
106 attrs = []
107 match = tagfind.match(rawdata, i+1)
108 assert match, 'unexpected call to parse_starttag()'
109 k = match.end()
110 self.lasttag = tag = rawdata[i+1:k].lower()
111
112 if tag == 'img':
113 return endpos
114
115 while k < endpos:
116 m = attrfind.match(rawdata, k)
117 if not m:
118 break
119 attrname, rest, attrvalue = m.group(1, 2, 3)
120 if not rest:
121 attrvalue = None
122 elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
123 attrvalue[:1] == '"' == attrvalue[-1:]:
124 attrvalue = attrvalue[1:-1]
125 attrvalue = self.unescape(attrvalue)
126 attrs.append((attrname.lower(), attrvalue))
127 k = m.end()
128
129 end = rawdata[k:endpos].strip()
130 if end not in (">", "/>"):
131 lineno, offset = self.getpos()
132 if "\n" in self.__starttag_text:
133 lineno = lineno + self.__starttag_text.count("\n")
134 offset = len(self.__starttag_text) \
135 - self.__starttag_text.rfind("\n")
136 else:
137 offset = offset + len(self.__starttag_text)
138 self.error("junk characters in start tag: %r"
139 % (rawdata[k:endpos][:20],))
140 if end.endswith('/>'):
141 # XHTML-style empty tag: <span attr="value" />
142 self.handle_startendtag(tag, attrs)
143 else:
144 self.handle_starttag(tag, attrs)
145 if tag in self.CDATA_CONTENT_ELEMENTS:
146 self.set_cdata_mode()
147 return endpos
148
149 def check_for_whole_start_tag(self, i):
150 rawdata = self.rawdata
151 m = locatestarttagend.match(rawdata, i)
152 if m:
153 j = m.end()
154 next = rawdata[j:j+1]
155 #print next
156 #if next == "'":
157 # j = rawdata.find(".jpg'",j)
158 # j = rawdata.find(".jpg'",j+1)
159 # next = rawdata[j:j+1]
160 if next == ">":
161 return j + 1
162 if next == "/":
163 if rawdata.startswith("/>", j):
164 return j + 2
165 if rawdata.startswith("/", j):
166 # buffer boundary
167 return -1
168 # else bogus input
169 self.updatepos(i, j + 1)
170 self.error("malformed empty start tag")
171 if next == "":
172 # end of input
173 return -1
174 if next in ("abcdefghijklmnopqrstuvwxyz=/"
175 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
176 # end of input in or before attribute value, or we have the
177 # '/' from a '/>' ending
178 return -1
179 self.updatepos(i, j)
180 self.error("malformed start tag")
181 raise AssertionError("we should not get here!")
182
183 def _Parse(self,url):
184 #try:
185 #print self
186 #print url
187 self.table = []
188 self.hyperlinks = []
189 req = urlopen(url)
190 self.feed(req.read())
191
192 #except:
193 #print "Error Getting page: "+url
194 #print "Please retry. If problem persists, contact developer"
195
196 def handle_starttag(self,tag,attrs):
197 if tag == 'a' and attrs:
198 self.hyperlinks.append(attrs[0][1])
199
200 if tag == 'tr':
201 self.InRow=1
202 if tag == 'td':
203 self.InEntry=1
204
205 def handle_endtag(self,tag):
206 if tag =='tr':
207 if self.InRow==1:
208 self.InRow=0
209 self.table.append(self.tmpRow)
210 self.tmpRow=[]
211 if tag == 'td':
212 self.InEntry=0
213
214 def handle_startendtag(self,tag, attrs):
215 pass
216
217 def handle_data(self,data):
218 if self.InEntry:
219 self.tmpRow.append(data)
220
221 def ParsePage1(self): ## Parse the Run list page to figure out what the most recent run was
222 try:
223 # Find the first non-empty row on page one
224 MostRecent = self.table[0]
225 print MostRecent
226 for line in self.table:
227 if line == []:
228 continue # skip empty rows, not exactly sure why they show up
229 MostRecent = line
230 break # find first non-empty line
231 TriggerMode = MostRecent[3]
232 self.RunNumber = MostRecent[0] ## Set the run number
233
234
235
236 isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1) ## Is the most recent run a collisions run?
237 if not isCollisions:
238 return ''
239 for link in self.hyperlinks:
240 if not link.find('RUN='+self.RunNumber)==-1:
241 self.RunPage = link ## Get the link to the run summary page and return
242 return link
243 except:
244 print 'No Recent run'
245 return ''
246
247 def ParseRunPage(self):
248 for entry in self.hyperlinks:
249
250 entry = entry.replace('../../','http://cmswbm/')
251 if not entry.find('HLTSummary') == -1:
252 self.RatePage = entry
253 if not entry.find('L1Summary') == -1:
254 self.L1Page = entry
255 if not entry.find('LumiSections') == -1:
256 self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
257 if not entry.find('PrescaleChanges') == -1:
258 self.PrescaleChangesPage = "http://cmswbm/cmsdb/servlet/"+entry
259 if not entry.find('TriggerMode') == -1:
260 self.TriggerModePage = entry
261 #print self.table
262 self.HLT_Key = self.table[8][0]
263 #print self.HLT_Key
264 self.Date = self.table[1][4]
265 #print self.Date
266
267 return [self.RatePage,self.LumiPage,self.L1Page,self.PrescaleChangesPage,self.TriggerModePage]
268
269
270 def ParseHLTSummaryPage(self,StartLS,EndLS):
271
272 for line in self.table:
273 if not len(line)>6: # All relevant lines in the table will be at least this long
274 continue
275 if line[1].startswith('HLT_'):
276 TriggerName = line[1][:line[1].find('_v')+2] # Format is HLT_... (####), this gets rid of the (####)
277 TriggerRate = float(line[6].replace(',','')) # Need to remove the ","s, since float() can't parse them
278 L1Pass = int(line[3])
279 PSPass = int(line[4])
280 Seed = line[9]
281 if int(line[4])>0: #line[3] is L1Pass, line[4] is PSPass
282 PS = float(line[3])/float(line[4])
283 else:
284 if int(line[3])>0:
285 PS = line[3]
286 else:
287 PS = 1
288 self.TriggerRates[TriggerName] = [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS]
289
290 return self.TriggerRates
291
292
293 def ParseLumiPage(self,StartLS,EndLS):
294
295 for line in self.table:
296 if len(line)<2 or len(line)>13:
297 continue
298 if float(line[8]) < 10 or float(line[9]) < 1: ##Beam 1 or Beam 2 absent
299 continue
300
301 self.LSByLS.append(int(line[0])) #LumiSection number is in position 0
302 self.PSColumnByLS[int(line[0])] = int(line[2]) #Prescale column is in position 2
303 self.InstLumiByLS[int(line[0])] = round(float(line[4]),2) #Instantaneous luminosity (delivered?) is in position 4
304 self.LiveLumiByLS[int(line[0])] = round(float(line[6]),2) # Live lumi is in position 6
305 self.DeliveredLumiByLS[int(line[0])] = round(float(line[5]),2) #Delivered lumi is in position 5
306
307 if StartLS < 0:
308 EndLS = max(self.LSByLS) - 3
309 StartLS = EndLS + StartLS
310 if StartLS < 2: #The parser does not parse the first LS
311 StartLS = 2
312 if StartLS == 999999:
313 StartLS = min(self.LSByLS)
314 if EndLS == 111111:
315 EndLS = max(self.LSByLS)
316 if EndLS <= StartLS:
317 print "In ParseLumiPage, EndLS <= StartLS"
318
319 #print "In ParseLumiPage, StartLS = "+str(StartLS)+" and EndLS = "+str(EndLS)
320
321 self.AvLiveLumi = 1000*(self.LiveLumiByLS[EndLS] - self.LiveLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
322 self.AvDeliveredLumi = 1000*(self.DeliveredLumiByLS[EndLS] - self.DeliveredLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
323 value_iterator = 0
324 for value in self.LSByLS:
325 if value >= StartLS and value <= EndLS:
326 self.AvInstLumi+=self.InstLumiByLS[value]
327 value_iterator+=1
328 self.AvInstLumi = self.AvInstLumi / value_iterator
329
330 ### PS column info
331
332 self.LumiInfo = [self.LSByLS, self.PSColumnByLS, self.InstLumiByLS, self.DeliveredLumiByLS, self.LiveLumiByLS, self.AvInstLumi, self.AvDeliveredLumi, self.AvLiveLumi]
333
334 return [self.LumiInfo,StartLS,EndLS]
335
336
337 def ParseL1Page(self): ##Not used for anything - get this information with ParseTriggerModePage
338 for line in self.table:
339 if len(line) < 10:
340 continue
341 if line[1].startswith('L1_'):
342 try:
343 self.L1Rates[line[1]] = float(line[len(line)-4])
344 except:
345 correctedNumber = line[len(line)-4].replace(",","")
346 self.L1Rates[line[1]] = float(correctedNumber)
347
348 return self.L1Rates
349
350 def ParsePSColumnPage(self):
351 for line in self.table:
352 if len(line) < 5 or line[0].startswith('Run'):
353 continue
354 self.PSColumnChanges.append([int(line[1]),int(line[2])]) #line[1] is the first LS of a new PS column, line[2] is the column index
355 return self.PSColumnChanges
356
357 def ParseTriggerModePage(self):
358 for line in self.table:
359 if len(line) < 6 or line[0].startswith('n'):
360 continue
361 if len(line) > 11:
362 print line
363 if line[1].startswith('L1_'):
364 self.L1TriggerMode[line[1]] = []
365 for n in range(2, len(line)): #"range" does not include the last element (i.e. there is no n = len(line))
366 self.L1TriggerMode[line[1]].append(int(line[n]))
367
368 if line[1].startswith('HLT_'):
369 HLTStringName = line[1]
370 for s in HLTStringName.split("_v"): #Eliminates version number from the string name
371 if s.isdigit():
372 numbertoreplace = s
373 HLTStringName = HLTStringName.replace('_v'+str(numbertoreplace),'_v')
374
375 self.HLTTriggerMode[HLTStringName] = []
376
377 for n in range(3, len(line)-1): #The parser counts the number in parentheses after the trigger name as its own column
378 self.HLTTriggerMode[HLTStringName].append(int(line[n]))
379
380 if line[len(line)-1].startswith('L1_'):
381 self.HLTSeed[HLTStringName] = line[len(line)-1]
382 else:
383 if not " OR" in line[len(line)-1]:
384 self.HLTTriggerMode[HLTStringName].append(int(line[n]))
385 self.HLTSeed[HLTStringName] = "NULL"
386 else:
387 self.HLTSeed[HLTStringName] = str(line[len(line)-1])
388
389 self.TriggerInfo = [self.L1TriggerMode,self.HLTTriggerMode,self.HLTSeed]
390 return self.TriggerInfo
391
392 def AssemblePrescaleValues(self): ##Depends on output from ParseLumiPage and ParseTriggerModePage
393 MissingName = "Nemo"
394 for key in self.L1TriggerMode:
395 self.L1Prescale[key] = {}
396 for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range()" excludes the last element
397 try:
398 self.L1Prescale[key][n] = self.L1TriggerMode[key][self.PSColumnByLS[n]]
399 except:
400 if not key == MissingName:
401 self.MissingPrescale.append(key)
402 MissingName = key
403 if not n < 2:
404 print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
405
406 for key in self.HLTTriggerMode:
407 self.HLTPrescale[key] = {}
408 for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range" excludes the last element
409 try:
410 self.HLTPrescale[key][n] = self.HLTTriggerMode[key][self.PSColumnByLS[n]]
411 except:
412 if not key == MissingName:
413 self.MissingPrescale.append(key)
414 MissingName = key
415 if not n < 2:
416 print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
417
418 self.PrescaleValues = [self.L1Prescale,self.HLTPrescale,self.MissingPrescale]
419 return self.PrescaleValues
420
421 def ComputeTotalPrescales(self,StartLS,EndLS):
422 IdealHLTPrescale = {}
423 IdealPrescale = {}
424 L1_zero = {}
425 HLT_zero = {}
426 n1 = {}
427 n2 = {}
428 L1 = {}
429 L2 = {}
430 H1 = {}
431 H2 = {}
432 InitialColumnIndex = self.PSColumnByLS[int(StartLS)]
433
434 for key in self.HLTTriggerMode:
435 try:
436 DoesThisPathHaveAValidL1SeedWithPrescale = self.L1Prescale[self.HLTSeed[key]][StartLS]
437 except:
438 L1_zero[key] = True
439 HLT_zero[key] = False
440 continue
441
442 IdealHLTPrescale[key] = 0.0
443 IdealPrescale[key] = 0.0
444 n1[key] = 0
445 L1_zero[key] = False
446 HLT_zero[key] = False
447
448 for LSIterator in range(StartLS,EndLS+1): #"range" excludes the last element
449 if self.L1Prescale[self.HLTSeed[key]][LSIterator] > 0 and self.HLTPrescale[key][LSIterator] > 0:
450 IdealPrescale[key]+=1.0/(self.L1Prescale[self.HLTSeed[key]][LSIterator]*self.HLTPrescale[key][LSIterator])
451 else:
452 IdealPrescale[key]+=1.0 ##To prevent a divide by 0 error later
453 if self.L1Prescale[self.HLTSeed[key]][LSIterator] < 0.1:
454 L1_zero[key] = True
455 if self.HLTPrescale[key][LSIterator] < 0.1:
456 HLT_zero[key] = True
457 if self.PSColumnByLS[LSIterator] == InitialColumnIndex:
458 n1[key]+=1
459
460 if L1_zero[key] == True or HLT_zero[key] == True:
461 continue
462
463 IdealPrescale[key] = (EndLS + 1 - StartLS)/IdealPrescale[key]
464
465 n2[key] = float(EndLS + 1 - StartLS - n1[key])
466 L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS])
467 L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS])
468 H1[key] = float(self.HLTPrescale[key][StartLS])
469 H2[key] = float(self.HLTPrescale[key][EndLS])
470
471 IdealHLTPrescale[key] = ((n1[key]/L1[key])+(n2[key]/L2[key]))/((n1[key]/(L1[key]*H1[key]))+(n2[key]/(L2[key]*H2[key])))
472
473 self.TotalPSInfo = [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2]
474
475 return self.TotalPSInfo
476
477
478 def CorrectForPrescaleChange(self,StartLS,EndLS):
479 [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2] = self.TotalPSInfo
480 xLS = {}
481 RealPrescale = {}
482
483 for key in self.HLTTriggerMode:
484 if L1_zero[key] == True or HLT_zero[key] == True:
485 continue
486 [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS] = self.TriggerRates[key]
487 if PS > 0.95 * IdealHLTPrescale[key] and PS < 1.05 * IdealHLTPrescale[key]:
488 RealPrescale[key] = IdealPrescale[key]
489 continue
490
491 if H1[key] == H2[key] and L1[key] == L2[key] and not EndLS > max(self.LSByLS) - 1: ##Look for prescale change into the next LS
492 H2[key] = float(self.HLTPrescale[key][EndLS+1])
493 L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS+1])
494 if H1[key] == H2[key] and L1[key] == L2[key] and not StartLS < 3:
495 H1[key] = float(self.HLTPrescale[key][StartLS-1])
496 L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS-1])
497 if H1[key] == H2[key]:
498 xLS[key] = 0
499 else:
500 xLS[key] = ((-(PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]*n1[key]+L1[key]*n2[key])))/(((PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H1[key]*L1[key]-H2[key]*L2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]-L1[key])))
501
502 if xLS[key] > 1:
503 xLS[key] = 1
504 if xLS[key] < -1:
505 xLS[key] = -1
506 RealPrescale[key] = (n1[key] + n2[key])/(((n1[key] - xLS[key])/(H1[key]*L1[key]))+(n2[key]+xLS[key])/(H2[key]*L2[key]))
507
508 self.CorrectedPSInfo = [RealPrescale,xLS,L1,L2,H1,H2]
509
510 return self.CorrectedPSInfo
511
512 def Save(self, fileName):
513 dir = os.path.dirname(fileName)
514 if not os.path.exists(dir):
515 os.makedirs(dir)
516 pickle.dump( self, open( fileName, 'w' ) )
517
518 def Load(self, fileName):
519 self = pickle.load( open( fileName ) )