ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/RateMonShiftTool_dev/AndrewWBMParser.py
Revision: 1.1
Committed: Wed Nov 30 18:02:13 2011 UTC (13 years, 5 months ago) by abrinke1
Content type: text/x-python
Branch: MAIN
Log Message:
Combination of Alex's and G&A's code

File Contents

# User Rev Content
1 abrinke1 1.1 from HTMLParser import HTMLParser
2     from urllib2 import urlopen
3     import cPickle as pickle
4     import os, sys
5     import time
6     import re
7    
8     ### need to overwrite some functions in the HTMLParser library
9     locatestarttagend = re.compile(r"""
10     <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
11     (?:\s+ # whitespace before attribute name
12     (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
13     (?:\s*=\s* # value indicator
14     (?:'[^']*' # LITA-enclosed value
15     |\"[^\"]*\" # LIT-enclosed value
16     |this.src='[^']*' # hack
17     |[^'\">\s]+ # bare value
18     )
19     )?
20     )
21     )*
22     \s* # trailing whitespace
23     """, re.VERBOSE)
24    
25     tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
26     attrfind = re.compile(
27     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
28     r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
29    
30     class AndrewWBMParser(HTMLParser):
31    
32     def __init__(self):
33     HTMLParser.__init__(self)
34     self.InRow=0
35     self.InEntry=0
36     self.table = []
37     self.tmpRow = []
38     self.hyperlinks = []
39    
40     ##-- Defined in ParsePage1 --##
41     self.RunNumber = 0
42    
43     ##-- Defined in ParseRunPage --##
44     self.RatePage = ''
45     self.LumiPage = ''
46     self.L1Page=''
47     self.PrescaleChangesPage=''
48     self.TriggerModePage=''
49     self.Date=''
50     self.HLT_Key=''
51    
52     ##-- Defined in ParseHLTSummaryPage --##
53     self.TriggerRates = {}
54    
55     ##-- Defined in ParseLumiPage --##
56     self.LSByLS = []
57     self.InstLumiByLS = {}
58     self.DeliveredLumiByLS = {}
59     self.LiveLumiByLS = {}
60     self.PSColumnByLS = {}
61     self.AvInstLumi = 0
62     self.AvDeliveredLumi = 0
63     self.AvLiveLumi = 0
64     self.LumiInfo = [] ##Returns
65    
66     ##-- Defined in ParseL1Page (not currently used) --##
67     self.L1Rates={} ##Returns
68    
69     ##-- Defined in ParsePSColumnPage (not currently used) --##
70     self.PSColumnChanges=[] ##Returns
71    
72     ##-- Defined in ParseTriggerModePage --##
73     self.L1TriggerMode={}
74     self.HLTTriggerMode={}
75     self.HLTSeed={}
76     self.TriggerInfo = [] ##Returns
77    
78     ##-- Defined in AssemblePrescaleValues --##
79     self.L1Prescale={}
80     self.HLTPrescale={}
81     self.MissingPrescale=[]
82     self.PrescaleValues=[] ##Returns
83    
84     ##-- Defined in ComputeTotalPrescales --##
85     self.TotalPSInfo = [] ##Returns
86    
87     ##-- Defined in CorrectForPrescaleChange --##
88     self.CorrectedPSInfo = [] ##Returns
89    
90     ##-- In the current Parser.py philosophy, only RunNumber is set globally
91     ## - LS range is set from the outside for each individual function
92     #self.FirstLS = -1
93     #self.LastLS = -1
94    
95    
96     def parse_starttag(self, i): ## Overwrite function from HTMLParser
97     self.__starttag_text = None
98     endpos = self.check_for_whole_start_tag(i)
99     if endpos < 0:
100     return endpos
101     rawdata = self.rawdata
102     self.__starttag_text = rawdata[i:endpos]
103    
104     # Now parse the data between i+1 and j into a tag and attrs
105     attrs = []
106     match = tagfind.match(rawdata, i+1)
107     assert match, 'unexpected call to parse_starttag()'
108     k = match.end()
109     self.lasttag = tag = rawdata[i+1:k].lower()
110    
111     if tag == 'img':
112     return endpos
113    
114     while k < endpos:
115     m = attrfind.match(rawdata, k)
116     if not m:
117     break
118     attrname, rest, attrvalue = m.group(1, 2, 3)
119     if not rest:
120     attrvalue = None
121     elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
122     attrvalue[:1] == '"' == attrvalue[-1:]:
123     attrvalue = attrvalue[1:-1]
124     attrvalue = self.unescape(attrvalue)
125     attrs.append((attrname.lower(), attrvalue))
126     k = m.end()
127    
128     end = rawdata[k:endpos].strip()
129     if end not in (">", "/>"):
130     lineno, offset = self.getpos()
131     if "\n" in self.__starttag_text:
132     lineno = lineno + self.__starttag_text.count("\n")
133     offset = len(self.__starttag_text) \
134     - self.__starttag_text.rfind("\n")
135     else:
136     offset = offset + len(self.__starttag_text)
137     self.error("junk characters in start tag: %r"
138     % (rawdata[k:endpos][:20],))
139     if end.endswith('/>'):
140     # XHTML-style empty tag: <span attr="value" />
141     self.handle_startendtag(tag, attrs)
142     else:
143     self.handle_starttag(tag, attrs)
144     if tag in self.CDATA_CONTENT_ELEMENTS:
145     self.set_cdata_mode()
146     return endpos
147    
148     def check_for_whole_start_tag(self, i):
149     rawdata = self.rawdata
150     m = locatestarttagend.match(rawdata, i)
151     if m:
152     j = m.end()
153     next = rawdata[j:j+1]
154     #print next
155     #if next == "'":
156     # j = rawdata.find(".jpg'",j)
157     # j = rawdata.find(".jpg'",j+1)
158     # next = rawdata[j:j+1]
159     if next == ">":
160     return j + 1
161     if next == "/":
162     if rawdata.startswith("/>", j):
163     return j + 2
164     if rawdata.startswith("/", j):
165     # buffer boundary
166     return -1
167     # else bogus input
168     self.updatepos(i, j + 1)
169     self.error("malformed empty start tag")
170     if next == "":
171     # end of input
172     return -1
173     if next in ("abcdefghijklmnopqrstuvwxyz=/"
174     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
175     # end of input in or before attribute value, or we have the
176     # '/' from a '/>' ending
177     return -1
178     self.updatepos(i, j)
179     self.error("malformed start tag")
180     raise AssertionError("we should not get here!")
181    
182     def _Parse(self,url):
183     #try:
184     #print self
185     #print url
186     self.table = []
187     self.hyperlinks = []
188     req = urlopen(url)
189     self.feed(req.read())
190    
191     #except:
192     #print "Error Getting page: "+url
193     #print "Please retry. If problem persists, contact developer"
194    
195     def handle_starttag(self,tag,attrs):
196     if tag == 'a' and attrs:
197     self.hyperlinks.append(attrs[0][1])
198    
199     if tag == 'tr':
200     self.InRow=1
201     if tag == 'td':
202     self.InEntry=1
203    
204     def handle_endtag(self,tag):
205     if tag =='tr':
206     if self.InRow==1:
207     self.InRow=0
208     self.table.append(self.tmpRow)
209     self.tmpRow=[]
210     if tag == 'td':
211     self.InEntry=0
212    
213     def handle_startendtag(self,tag, attrs):
214     pass
215    
216     def handle_data(self,data):
217     if self.InEntry:
218     self.tmpRow.append(data)
219    
220     def ParsePage1(self): ## Parse the Run list page to figure out what the most recent run was
221     # Find the first non-empty row on page one
222     MostRecent = self.table[0]
223     for line in self.table:
224     if line == []:
225     continue # skip empty rows, not exactly sure why they show up
226     MostRecent = line
227     break # find first non-empty line
228     TriggerMode = MostRecent[3]
229     self.RunNumber = MostRecent[0] ## Set the run number
230    
231     isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1) ## Is the most recent run a collisions run?
232     if not isCollisions:
233     return ''
234     for link in self.hyperlinks:
235     if not link.find('RUN='+self.RunNumber)==-1:
236     self.RunPage = link ## Get the link to the run summary page and return
237     return link
238    
239     def ParseRunPage(self):
240     for entry in self.hyperlinks:
241    
242     entry = entry.replace('../../','http://cmswbm/')
243     if not entry.find('HLTSummary') == -1:
244     self.RatePage = entry
245     if not entry.find('L1Summary') == -1:
246     self.L1Page = entry
247     if not entry.find('LumiSections') == -1:
248     self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
249     if not entry.find('PrescaleChanges') == -1:
250     self.PrescaleChangesPage = "http://cmswbm/cmsdb/servlet/"+entry
251     if not entry.find('TriggerMode') == -1:
252     self.TriggerModePage = entry
253     #print self.table
254     self.HLT_Key = self.table[8][0]
255     #print self.HLT_Key
256     self.Date = self.table[1][4]
257     #print self.Date
258    
259     return [self.RatePage,self.LumiPage,self.L1Page,self.PrescaleChangesPage,self.TriggerModePage]
260    
261    
262     def ParseHLTSummaryPage(self,StartLS,EndLS):
263    
264     for line in self.table:
265     if not len(line)>6: # All relevant lines in the table will be at least this long
266     continue
267     if line[1].startswith('HLT_'):
268     TriggerName = line[1][:line[1].find('_v')+2] # Format is HLT_... (####), this gets rid of the (####)
269     TriggerRate = float(line[6].replace(',','')) # Need to remove the ","s, since float() can't parse them
270     L1Pass = int(line[3])
271     PSPass = int(line[4])
272     Seed = line[9]
273     if int(line[4])>0: #line[3] is L1Pass, line[4] is PSPass
274     PS = float(line[3])/float(line[4])
275     else:
276     if int(line[3])>0:
277     PS = line[3]
278     else:
279     PS = 1
280     self.TriggerRates[TriggerName] = [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS]
281    
282     return self.TriggerRates
283    
284    
285     def ParseLumiPage(self,StartLS,EndLS):
286    
287     for line in self.table:
288     if len(line)<2 or len(line)>13:
289     continue
290     if float(line[8]) < 10 or float(line[9]) < 1: ##Beam 1 or Beam 2 absent
291     continue
292    
293     self.LSByLS.append(int(line[0])) #LumiSection number is in position 0
294     self.PSColumnByLS[int(line[0])] = int(line[2]) #Prescale column is in position 2
295     self.InstLumiByLS[int(line[0])] = round(float(line[4]),2) #Instantaneous luminosity (delivered?) is in position 4
296     self.LiveLumiByLS[int(line[0])] = round(float(line[6]),2) # Live lumi is in position 6
297     self.DeliveredLumiByLS[int(line[0])] = round(float(line[5]),2) #Delivered lumi is in position 5
298    
299     if StartLS < 0:
300     EndLS = max(self.LSByLS) - 3
301     StartLS = EndLS + StartLS
302     if StartLS < 2: #The parser does not parse the first LS
303     StartLS = 2
304     if StartLS == 999999:
305     StartLS = min(self.LSByLS)
306     if EndLS == 111111:
307     EndLS = max(self.LSByLS)
308     if EndLS <= StartLS:
309     print "In ParseLumiPage, EndLS <= StartLS"
310    
311     print "In ParseLumiPage, StartLS = "+str(StartLS)+" and EndLS = "+str(EndLS)
312    
313     self.AvLiveLumi = 1000*(self.LiveLumiByLS[EndLS] - self.LiveLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
314     self.AvDeliveredLumi = 1000*(self.DeliveredLumiByLS[EndLS] - self.DeliveredLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
315     value_iterator = 0
316     for value in self.LSByLS:
317     if value >= StartLS and value <= EndLS:
318     self.AvInstLumi+=self.InstLumiByLS[value]
319     value_iterator+=1
320     self.AvInstLumi = self.AvInstLumi / value_iterator
321    
322     self.LumiInfo = [self.LSByLS, self.PSColumnByLS, self.InstLumiByLS, self.DeliveredLumiByLS, self.LiveLumiByLS, self.AvInstLumi, self.AvDeliveredLumi, self.AvLiveLumi]
323    
324     return [self.LumiInfo,StartLS,EndLS]
325    
326    
327     def ParseL1Page(self): ##Not used for anything - get this information with ParseTriggerModePage
328     for line in self.table:
329     if len(line) < 10:
330     continue
331     if line[1].startswith('L1_'):
332     try:
333     self.L1Rates[line[1]] = float(line[len(line)-4])
334     except:
335     correctedNumber = line[len(line)-4].replace(",","")
336     self.L1Rates[line[1]] = float(correctedNumber)
337    
338     return self.L1Rates
339    
340     def ParsePSColumnPage(self):
341     for line in self.table:
342     if len(line) < 5 or line[0].startswith('Run'):
343     continue
344     self.PSColumnChanges.append([int(line[1]),int(line[2])]) #line[1] is the first LS of a new PS column, line[2] is the column index
345     return self.PSColumnChanges
346    
347     def ParseTriggerModePage(self):
348     for line in self.table:
349     if len(line) < 6 or line[0].startswith('n'):
350     continue
351     if len(line) > 11:
352     print line
353     if line[1].startswith('L1_'):
354     self.L1TriggerMode[line[1]] = []
355     for n in range(2, len(line)): #"range" does not include the last element (i.e. there is no n = len(line))
356     self.L1TriggerMode[line[1]].append(int(line[n]))
357    
358     if line[1].startswith('HLT_'):
359     HLTStringName = line[1]
360     for s in HLTStringName.split("_v"): #Eliminates version number from the string name
361     if s.isdigit():
362     numbertoreplace = s
363     HLTStringName = HLTStringName.replace('_v'+str(numbertoreplace),'_v')
364    
365     self.HLTTriggerMode[HLTStringName] = []
366    
367     for n in range(3, len(line)-1): #The parser counts the number in parentheses after the trigger name as its own column
368     self.HLTTriggerMode[HLTStringName].append(int(line[n]))
369    
370     if line[len(line)-1].startswith('L1_'):
371     self.HLTSeed[HLTStringName] = line[len(line)-1]
372     else:
373     if not " OR" in line[len(line)-1]:
374     self.HLTTriggerMode[HLTStringName].append(int(line[n]))
375     self.HLTSeed[HLTStringName] = "NULL"
376     else:
377     self.HLTSeed[HLTStringName] = str(line[len(line)-1])
378    
379     self.TriggerInfo = [self.L1TriggerMode,self.HLTTriggerMode,self.HLTSeed]
380     return self.TriggerInfo
381    
382     def AssemblePrescaleValues(self): ##Depends on output from ParseLumiPage and ParseTriggerModePage
383     MissingName = "Nemo"
384     for key in self.L1TriggerMode:
385     self.L1Prescale[key] = {}
386     for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range()" excludes the last element
387     try:
388     self.L1Prescale[key][n] = self.L1TriggerMode[key][self.PSColumnByLS[n]]
389     except:
390     if not key == MissingName:
391     self.MissingPrescale.append(key)
392     MissingName = key
393     if not n < 2:
394     print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
395    
396     for key in self.HLTTriggerMode:
397     self.HLTPrescale[key] = {}
398     for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range" excludes the last element
399     try:
400     self.HLTPrescale[key][n] = self.HLTTriggerMode[key][self.PSColumnByLS[n]]
401     except:
402     if not key == MissingName:
403     self.MissingPrescale.append(key)
404     MissingName = key
405     if not n < 2:
406     print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
407    
408     self.PrescaleValues = [self.L1Prescale,self.HLTPrescale,self.MissingPrescale]
409     return self.PrescaleValues
410    
411     def ComputeTotalPrescales(self,StartLS,EndLS):
412     IdealHLTPrescale = {}
413     IdealPrescale = {}
414     L1_zero = {}
415     HLT_zero = {}
416     n1 = {}
417     n2 = {}
418     L1 = {}
419     L2 = {}
420     H1 = {}
421     H2 = {}
422     InitialColumnIndex = self.PSColumnByLS[int(StartLS)]
423    
424     for key in self.HLTTriggerMode:
425     try:
426     DoesThisPathHaveAValidL1SeedWithPrescale = self.L1Prescale[self.HLTSeed[key]][StartLS]
427     except:
428     L1_zero[key] = True
429     HLT_zero[key] = False
430     continue
431    
432     IdealHLTPrescale[key] = 0.0
433     IdealPrescale[key] = 0.0
434     n1[key] = 0
435     L1_zero[key] = False
436     HLT_zero[key] = False
437    
438     for LSIterator in range(StartLS,EndLS+1): #"range" excludes the last element
439     if self.L1Prescale[self.HLTSeed[key]][LSIterator] > 0 and self.HLTPrescale[key][LSIterator] > 0:
440     IdealPrescale[key]+=1.0/(self.L1Prescale[self.HLTSeed[key]][LSIterator]*self.HLTPrescale[key][LSIterator])
441     else:
442     IdealPrescale[key]+=1.0 ##To prevent a divide by 0 error later
443     if self.L1Prescale[self.HLTSeed[key]][LSIterator] < 0.1:
444     L1_zero[key] = True
445     if self.HLTPrescale[key][LSIterator] < 0.1:
446     HLT_zero[key] = True
447     if self.PSColumnByLS[LSIterator] == InitialColumnIndex:
448     n1[key]+=1
449    
450     if L1_zero[key] == True or HLT_zero[key] == True:
451     continue
452    
453     IdealPrescale[key] = (EndLS + 1 - StartLS)/IdealPrescale[key]
454    
455     n2[key] = float(EndLS + 1 - StartLS - n1[key])
456     L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS])
457     L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS])
458     H1[key] = float(self.HLTPrescale[key][StartLS])
459     H2[key] = float(self.HLTPrescale[key][EndLS])
460    
461     IdealHLTPrescale[key] = ((n1[key]/L1[key])+(n2[key]/L2[key]))/((n1[key]/(L1[key]*H1[key]))+(n2[key]/(L2[key]*H2[key])))
462    
463     self.TotalPSInfo = [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2]
464    
465     return self.TotalPSInfo
466    
467    
468     def CorrectForPrescaleChange(self,StartLS,EndLS):
469     [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2] = self.TotalPSInfo
470     xLS = {}
471     RealPrescale = {}
472    
473     for key in self.HLTTriggerMode:
474     if L1_zero[key] == True or HLT_zero[key] == True:
475     continue
476     [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS] = self.TriggerRates[key]
477     if PS > 0.95 * IdealHLTPrescale[key] and PS < 1.05 * IdealHLTPrescale[key]:
478     RealPrescale[key] = IdealPrescale[key]
479     continue
480    
481     if H1[key] == H2[key] and L1[key] == L2[key] and not EndLS > max(self.LSByLS) - 1: ##Look for prescale change into the next LS
482     H2[key] = float(self.HLTPrescale[key][EndLS+1])
483     L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS+1])
484     if H1[key] == H2[key] and L1[key] == L2[key] and not StartLS < 3:
485     H1[key] = float(self.HLTPrescale[key][StartLS-1])
486     L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS-1])
487     if H1[key] == H2[key]:
488     xLS[key] = 0
489     else:
490     xLS[key] = ((-(PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]*n1[key]+L1[key]*n2[key])))/(((PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H1[key]*L1[key]-H2[key]*L2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]-L1[key])))
491    
492     if xLS[key] > 1:
493     xLS[key] = 1
494     if xLS[key] < -1:
495     xLS[key] = -1
496     RealPrescale[key] = (n1[key] + n2[key])/(((n1[key] - xLS[key])/(H1[key]*L1[key]))+(n2[key]+xLS[key])/(H2[key]*L2[key]))
497    
498     self.CorrectedPSInfo = [RealPrescale,xLS,L1,L2,H1,H2]
499    
500     return self.CorrectedPSInfo
501    
502     def Save(self, fileName):
503     dir = os.path.dirname(fileName)
504     if not os.path.exists(dir):
505     os.makedirs(dir)
506     pickle.dump( self, open( fileName, 'w' ) )
507    
508     def Load(self, fileName):
509     self = pickle.load( open( fileName ) )