ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/RateMonShiftTool_dev/AndrewWBMParser.py
Revision: 1.2
Committed: Fri Dec 16 19:43:51 2011 UTC (13 years, 4 months ago) by grchrist
Content type: text/x-python
Branch: MAIN
CVS Tags: V00-02-04, V00-02-03, V00-02-01, V00-01-10, V-00-01-10, V00-01-09, V00-01-08, V00-01-07, V00-01-06, V00-01-05, V00-01-04, V00-01-03, V00-01-02, V00-01-01, V00-00-34, V00-00-33, MenuAnalyzer_V00-00-02, MenuAnalyzer_V00-00-01, MenuAnalyzer_V1, V00-00-32, V00-00-31, V00-00-30, V00-00-29, V00-00-28, V00-00-27, V00-00-26, V00-00-24, V00-00-23, V00-00-22, V00-00-21, V00-00-20, V00-00-19, V00-00-18, V00-00-17, V00-00-16, V00-00-15, V00-00-14, V00-00-13, V00-00-12, V00-00-11, V00-00-10, V00-00-09, V00-00-08, V00-00-07, V00-00-05, V00-00-04, V00-00-03, V00-00-02, V00-00-01
Branch point for: V00-00-06
Changes since 1.1: +27 -17 lines
Log Message:
More integration: adeded extra table info. fixed bug in ref runs.


File Contents

# User Rev Content
1 abrinke1 1.1 from HTMLParser import HTMLParser
2     from urllib2 import urlopen
3     import cPickle as pickle
4     import os, sys
5     import time
6     import re
7    
8     ### need to overwrite some functions in the HTMLParser library
9     locatestarttagend = re.compile(r"""
10     <[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
11     (?:\s+ # whitespace before attribute name
12     (?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
13     (?:\s*=\s* # value indicator
14     (?:'[^']*' # LITA-enclosed value
15     |\"[^\"]*\" # LIT-enclosed value
16     |this.src='[^']*' # hack
17     |[^'\">\s]+ # bare value
18     )
19     )?
20     )
21     )*
22     \s* # trailing whitespace
23     """, re.VERBOSE)
24    
25     tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
26     attrfind = re.compile(
27     r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
28     r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')
29    
30     class AndrewWBMParser(HTMLParser):
31    
32     def __init__(self):
33     HTMLParser.__init__(self)
34     self.InRow=0
35     self.InEntry=0
36     self.table = []
37     self.tmpRow = []
38     self.hyperlinks = []
39    
40     ##-- Defined in ParsePage1 --##
41     self.RunNumber = 0
42    
43     ##-- Defined in ParseRunPage --##
44     self.RatePage = ''
45     self.LumiPage = ''
46     self.L1Page=''
47     self.PrescaleChangesPage=''
48     self.TriggerModePage=''
49     self.Date=''
50     self.HLT_Key=''
51    
52     ##-- Defined in ParseHLTSummaryPage --##
53     self.TriggerRates = {}
54    
55     ##-- Defined in ParseLumiPage --##
56     self.LSByLS = []
57     self.InstLumiByLS = {}
58     self.DeliveredLumiByLS = {}
59     self.LiveLumiByLS = {}
60     self.PSColumnByLS = {}
61 grchrist 1.2 self.PrescaleColumnString=''
62 abrinke1 1.1 self.AvInstLumi = 0
63     self.AvDeliveredLumi = 0
64     self.AvLiveLumi = 0
65     self.LumiInfo = [] ##Returns
66    
67     ##-- Defined in ParseL1Page (not currently used) --##
68     self.L1Rates={} ##Returns
69    
70     ##-- Defined in ParsePSColumnPage (not currently used) --##
71     self.PSColumnChanges=[] ##Returns
72    
73     ##-- Defined in ParseTriggerModePage --##
74     self.L1TriggerMode={}
75     self.HLTTriggerMode={}
76     self.HLTSeed={}
77     self.TriggerInfo = [] ##Returns
78    
79     ##-- Defined in AssemblePrescaleValues --##
80     self.L1Prescale={}
81     self.HLTPrescale={}
82     self.MissingPrescale=[]
83     self.PrescaleValues=[] ##Returns
84    
85     ##-- Defined in ComputeTotalPrescales --##
86     self.TotalPSInfo = [] ##Returns
87    
88     ##-- Defined in CorrectForPrescaleChange --##
89     self.CorrectedPSInfo = [] ##Returns
90    
91     ##-- In the current Parser.py philosophy, only RunNumber is set globally
92     ## - LS range is set from the outside for each individual function
93     #self.FirstLS = -1
94     #self.LastLS = -1
95    
96    
97     def parse_starttag(self, i): ## Overwrite function from HTMLParser
98     self.__starttag_text = None
99     endpos = self.check_for_whole_start_tag(i)
100     if endpos < 0:
101     return endpos
102     rawdata = self.rawdata
103     self.__starttag_text = rawdata[i:endpos]
104    
105     # Now parse the data between i+1 and j into a tag and attrs
106     attrs = []
107     match = tagfind.match(rawdata, i+1)
108     assert match, 'unexpected call to parse_starttag()'
109     k = match.end()
110     self.lasttag = tag = rawdata[i+1:k].lower()
111    
112     if tag == 'img':
113     return endpos
114    
115     while k < endpos:
116     m = attrfind.match(rawdata, k)
117     if not m:
118     break
119     attrname, rest, attrvalue = m.group(1, 2, 3)
120     if not rest:
121     attrvalue = None
122     elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
123     attrvalue[:1] == '"' == attrvalue[-1:]:
124     attrvalue = attrvalue[1:-1]
125     attrvalue = self.unescape(attrvalue)
126     attrs.append((attrname.lower(), attrvalue))
127     k = m.end()
128    
129     end = rawdata[k:endpos].strip()
130     if end not in (">", "/>"):
131     lineno, offset = self.getpos()
132     if "\n" in self.__starttag_text:
133     lineno = lineno + self.__starttag_text.count("\n")
134     offset = len(self.__starttag_text) \
135     - self.__starttag_text.rfind("\n")
136     else:
137     offset = offset + len(self.__starttag_text)
138     self.error("junk characters in start tag: %r"
139     % (rawdata[k:endpos][:20],))
140     if end.endswith('/>'):
141     # XHTML-style empty tag: <span attr="value" />
142     self.handle_startendtag(tag, attrs)
143     else:
144     self.handle_starttag(tag, attrs)
145     if tag in self.CDATA_CONTENT_ELEMENTS:
146     self.set_cdata_mode()
147     return endpos
148    
149     def check_for_whole_start_tag(self, i):
150     rawdata = self.rawdata
151     m = locatestarttagend.match(rawdata, i)
152     if m:
153     j = m.end()
154     next = rawdata[j:j+1]
155     #print next
156     #if next == "'":
157     # j = rawdata.find(".jpg'",j)
158     # j = rawdata.find(".jpg'",j+1)
159     # next = rawdata[j:j+1]
160     if next == ">":
161     return j + 1
162     if next == "/":
163     if rawdata.startswith("/>", j):
164     return j + 2
165     if rawdata.startswith("/", j):
166     # buffer boundary
167     return -1
168     # else bogus input
169     self.updatepos(i, j + 1)
170     self.error("malformed empty start tag")
171     if next == "":
172     # end of input
173     return -1
174     if next in ("abcdefghijklmnopqrstuvwxyz=/"
175     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
176     # end of input in or before attribute value, or we have the
177     # '/' from a '/>' ending
178     return -1
179     self.updatepos(i, j)
180     self.error("malformed start tag")
181     raise AssertionError("we should not get here!")
182    
183     def _Parse(self,url):
184     #try:
185     #print self
186     #print url
187     self.table = []
188     self.hyperlinks = []
189     req = urlopen(url)
190     self.feed(req.read())
191    
192     #except:
193     #print "Error Getting page: "+url
194     #print "Please retry. If problem persists, contact developer"
195    
196     def handle_starttag(self,tag,attrs):
197     if tag == 'a' and attrs:
198     self.hyperlinks.append(attrs[0][1])
199    
200     if tag == 'tr':
201     self.InRow=1
202     if tag == 'td':
203     self.InEntry=1
204    
205     def handle_endtag(self,tag):
206     if tag =='tr':
207     if self.InRow==1:
208     self.InRow=0
209     self.table.append(self.tmpRow)
210     self.tmpRow=[]
211     if tag == 'td':
212     self.InEntry=0
213    
214     def handle_startendtag(self,tag, attrs):
215     pass
216    
217     def handle_data(self,data):
218     if self.InEntry:
219     self.tmpRow.append(data)
220    
221     def ParsePage1(self): ## Parse the Run list page to figure out what the most recent run was
222 grchrist 1.2 try:
223     # Find the first non-empty row on page one
224     MostRecent = self.table[0]
225     print MostRecent
226     for line in self.table:
227     if line == []:
228     continue # skip empty rows, not exactly sure why they show up
229     MostRecent = line
230     break # find first non-empty line
231     TriggerMode = MostRecent[3]
232     self.RunNumber = MostRecent[0] ## Set the run number
233    
234    
235 abrinke1 1.1
236 grchrist 1.2 isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1) ## Is the most recent run a collisions run?
237     if not isCollisions:
238     return ''
239     for link in self.hyperlinks:
240     if not link.find('RUN='+self.RunNumber)==-1:
241     self.RunPage = link ## Get the link to the run summary page and return
242     return link
243     except:
244     print 'No Recent run'
245 abrinke1 1.1 return ''
246    
247     def ParseRunPage(self):
248     for entry in self.hyperlinks:
249    
250     entry = entry.replace('../../','http://cmswbm/')
251     if not entry.find('HLTSummary') == -1:
252     self.RatePage = entry
253     if not entry.find('L1Summary') == -1:
254     self.L1Page = entry
255     if not entry.find('LumiSections') == -1:
256     self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
257     if not entry.find('PrescaleChanges') == -1:
258     self.PrescaleChangesPage = "http://cmswbm/cmsdb/servlet/"+entry
259     if not entry.find('TriggerMode') == -1:
260     self.TriggerModePage = entry
261     #print self.table
262     self.HLT_Key = self.table[8][0]
263     #print self.HLT_Key
264     self.Date = self.table[1][4]
265     #print self.Date
266    
267     return [self.RatePage,self.LumiPage,self.L1Page,self.PrescaleChangesPage,self.TriggerModePage]
268    
269    
270     def ParseHLTSummaryPage(self,StartLS,EndLS):
271    
272     for line in self.table:
273     if not len(line)>6: # All relevant lines in the table will be at least this long
274     continue
275     if line[1].startswith('HLT_'):
276     TriggerName = line[1][:line[1].find('_v')+2] # Format is HLT_... (####), this gets rid of the (####)
277     TriggerRate = float(line[6].replace(',','')) # Need to remove the ","s, since float() can't parse them
278     L1Pass = int(line[3])
279     PSPass = int(line[4])
280     Seed = line[9]
281     if int(line[4])>0: #line[3] is L1Pass, line[4] is PSPass
282     PS = float(line[3])/float(line[4])
283     else:
284     if int(line[3])>0:
285     PS = line[3]
286     else:
287     PS = 1
288     self.TriggerRates[TriggerName] = [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS]
289    
290     return self.TriggerRates
291    
292    
293     def ParseLumiPage(self,StartLS,EndLS):
294    
295     for line in self.table:
296     if len(line)<2 or len(line)>13:
297     continue
298     if float(line[8]) < 10 or float(line[9]) < 1: ##Beam 1 or Beam 2 absent
299     continue
300    
301     self.LSByLS.append(int(line[0])) #LumiSection number is in position 0
302     self.PSColumnByLS[int(line[0])] = int(line[2]) #Prescale column is in position 2
303     self.InstLumiByLS[int(line[0])] = round(float(line[4]),2) #Instantaneous luminosity (delivered?) is in position 4
304     self.LiveLumiByLS[int(line[0])] = round(float(line[6]),2) # Live lumi is in position 6
305     self.DeliveredLumiByLS[int(line[0])] = round(float(line[5]),2) #Delivered lumi is in position 5
306    
307     if StartLS < 0:
308     EndLS = max(self.LSByLS) - 3
309     StartLS = EndLS + StartLS
310     if StartLS < 2: #The parser does not parse the first LS
311     StartLS = 2
312     if StartLS == 999999:
313     StartLS = min(self.LSByLS)
314     if EndLS == 111111:
315     EndLS = max(self.LSByLS)
316     if EndLS <= StartLS:
317     print "In ParseLumiPage, EndLS <= StartLS"
318    
319 grchrist 1.2 #print "In ParseLumiPage, StartLS = "+str(StartLS)+" and EndLS = "+str(EndLS)
320 abrinke1 1.1
321     self.AvLiveLumi = 1000*(self.LiveLumiByLS[EndLS] - self.LiveLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
322     self.AvDeliveredLumi = 1000*(self.DeliveredLumiByLS[EndLS] - self.DeliveredLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
323     value_iterator = 0
324     for value in self.LSByLS:
325     if value >= StartLS and value <= EndLS:
326     self.AvInstLumi+=self.InstLumiByLS[value]
327     value_iterator+=1
328     self.AvInstLumi = self.AvInstLumi / value_iterator
329    
330 grchrist 1.2 ### PS column info
331    
332 abrinke1 1.1 self.LumiInfo = [self.LSByLS, self.PSColumnByLS, self.InstLumiByLS, self.DeliveredLumiByLS, self.LiveLumiByLS, self.AvInstLumi, self.AvDeliveredLumi, self.AvLiveLumi]
333 grchrist 1.2
334 abrinke1 1.1 return [self.LumiInfo,StartLS,EndLS]
335    
336    
337     def ParseL1Page(self): ##Not used for anything - get this information with ParseTriggerModePage
338     for line in self.table:
339     if len(line) < 10:
340     continue
341     if line[1].startswith('L1_'):
342     try:
343     self.L1Rates[line[1]] = float(line[len(line)-4])
344     except:
345     correctedNumber = line[len(line)-4].replace(",","")
346     self.L1Rates[line[1]] = float(correctedNumber)
347    
348     return self.L1Rates
349    
350     def ParsePSColumnPage(self):
351     for line in self.table:
352     if len(line) < 5 or line[0].startswith('Run'):
353     continue
354     self.PSColumnChanges.append([int(line[1]),int(line[2])]) #line[1] is the first LS of a new PS column, line[2] is the column index
355     return self.PSColumnChanges
356    
357     def ParseTriggerModePage(self):
358     for line in self.table:
359     if len(line) < 6 or line[0].startswith('n'):
360     continue
361     if len(line) > 11:
362     print line
363     if line[1].startswith('L1_'):
364     self.L1TriggerMode[line[1]] = []
365     for n in range(2, len(line)): #"range" does not include the last element (i.e. there is no n = len(line))
366     self.L1TriggerMode[line[1]].append(int(line[n]))
367    
368     if line[1].startswith('HLT_'):
369     HLTStringName = line[1]
370     for s in HLTStringName.split("_v"): #Eliminates version number from the string name
371     if s.isdigit():
372     numbertoreplace = s
373     HLTStringName = HLTStringName.replace('_v'+str(numbertoreplace),'_v')
374    
375     self.HLTTriggerMode[HLTStringName] = []
376    
377     for n in range(3, len(line)-1): #The parser counts the number in parentheses after the trigger name as its own column
378     self.HLTTriggerMode[HLTStringName].append(int(line[n]))
379    
380     if line[len(line)-1].startswith('L1_'):
381     self.HLTSeed[HLTStringName] = line[len(line)-1]
382     else:
383     if not " OR" in line[len(line)-1]:
384     self.HLTTriggerMode[HLTStringName].append(int(line[n]))
385     self.HLTSeed[HLTStringName] = "NULL"
386     else:
387     self.HLTSeed[HLTStringName] = str(line[len(line)-1])
388    
389     self.TriggerInfo = [self.L1TriggerMode,self.HLTTriggerMode,self.HLTSeed]
390     return self.TriggerInfo
391    
392     def AssemblePrescaleValues(self): ##Depends on output from ParseLumiPage and ParseTriggerModePage
393     MissingName = "Nemo"
394     for key in self.L1TriggerMode:
395     self.L1Prescale[key] = {}
396     for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range()" excludes the last element
397     try:
398     self.L1Prescale[key][n] = self.L1TriggerMode[key][self.PSColumnByLS[n]]
399     except:
400     if not key == MissingName:
401     self.MissingPrescale.append(key)
402     MissingName = key
403     if not n < 2:
404     print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
405    
406     for key in self.HLTTriggerMode:
407     self.HLTPrescale[key] = {}
408     for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range" excludes the last element
409     try:
410     self.HLTPrescale[key][n] = self.HLTTriggerMode[key][self.PSColumnByLS[n]]
411     except:
412     if not key == MissingName:
413     self.MissingPrescale.append(key)
414     MissingName = key
415     if not n < 2:
416     print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
417    
418     self.PrescaleValues = [self.L1Prescale,self.HLTPrescale,self.MissingPrescale]
419     return self.PrescaleValues
420    
421     def ComputeTotalPrescales(self,StartLS,EndLS):
422     IdealHLTPrescale = {}
423     IdealPrescale = {}
424     L1_zero = {}
425     HLT_zero = {}
426     n1 = {}
427     n2 = {}
428     L1 = {}
429     L2 = {}
430     H1 = {}
431     H2 = {}
432     InitialColumnIndex = self.PSColumnByLS[int(StartLS)]
433    
434     for key in self.HLTTriggerMode:
435     try:
436     DoesThisPathHaveAValidL1SeedWithPrescale = self.L1Prescale[self.HLTSeed[key]][StartLS]
437     except:
438     L1_zero[key] = True
439     HLT_zero[key] = False
440     continue
441    
442     IdealHLTPrescale[key] = 0.0
443     IdealPrescale[key] = 0.0
444     n1[key] = 0
445     L1_zero[key] = False
446     HLT_zero[key] = False
447    
448     for LSIterator in range(StartLS,EndLS+1): #"range" excludes the last element
449     if self.L1Prescale[self.HLTSeed[key]][LSIterator] > 0 and self.HLTPrescale[key][LSIterator] > 0:
450     IdealPrescale[key]+=1.0/(self.L1Prescale[self.HLTSeed[key]][LSIterator]*self.HLTPrescale[key][LSIterator])
451     else:
452     IdealPrescale[key]+=1.0 ##To prevent a divide by 0 error later
453     if self.L1Prescale[self.HLTSeed[key]][LSIterator] < 0.1:
454     L1_zero[key] = True
455     if self.HLTPrescale[key][LSIterator] < 0.1:
456     HLT_zero[key] = True
457     if self.PSColumnByLS[LSIterator] == InitialColumnIndex:
458     n1[key]+=1
459    
460     if L1_zero[key] == True or HLT_zero[key] == True:
461     continue
462    
463     IdealPrescale[key] = (EndLS + 1 - StartLS)/IdealPrescale[key]
464    
465     n2[key] = float(EndLS + 1 - StartLS - n1[key])
466     L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS])
467     L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS])
468     H1[key] = float(self.HLTPrescale[key][StartLS])
469     H2[key] = float(self.HLTPrescale[key][EndLS])
470    
471     IdealHLTPrescale[key] = ((n1[key]/L1[key])+(n2[key]/L2[key]))/((n1[key]/(L1[key]*H1[key]))+(n2[key]/(L2[key]*H2[key])))
472    
473     self.TotalPSInfo = [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2]
474    
475     return self.TotalPSInfo
476    
477    
478     def CorrectForPrescaleChange(self,StartLS,EndLS):
479     [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2] = self.TotalPSInfo
480     xLS = {}
481     RealPrescale = {}
482    
483     for key in self.HLTTriggerMode:
484     if L1_zero[key] == True or HLT_zero[key] == True:
485     continue
486     [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS] = self.TriggerRates[key]
487     if PS > 0.95 * IdealHLTPrescale[key] and PS < 1.05 * IdealHLTPrescale[key]:
488     RealPrescale[key] = IdealPrescale[key]
489     continue
490    
491     if H1[key] == H2[key] and L1[key] == L2[key] and not EndLS > max(self.LSByLS) - 1: ##Look for prescale change into the next LS
492     H2[key] = float(self.HLTPrescale[key][EndLS+1])
493     L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS+1])
494     if H1[key] == H2[key] and L1[key] == L2[key] and not StartLS < 3:
495     H1[key] = float(self.HLTPrescale[key][StartLS-1])
496     L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS-1])
497     if H1[key] == H2[key]:
498     xLS[key] = 0
499     else:
500     xLS[key] = ((-(PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]*n1[key]+L1[key]*n2[key])))/(((PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H1[key]*L1[key]-H2[key]*L2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]-L1[key])))
501    
502     if xLS[key] > 1:
503     xLS[key] = 1
504     if xLS[key] < -1:
505     xLS[key] = -1
506     RealPrescale[key] = (n1[key] + n2[key])/(((n1[key] - xLS[key])/(H1[key]*L1[key]))+(n2[key]+xLS[key])/(H2[key]*L2[key]))
507    
508     self.CorrectedPSInfo = [RealPrescale,xLS,L1,L2,H1,H2]
509    
510     return self.CorrectedPSInfo
511    
512     def Save(self, fileName):
513     dir = os.path.dirname(fileName)
514     if not os.path.exists(dir):
515     os.makedirs(dir)
516     pickle.dump( self, open( fileName, 'w' ) )
517    
518     def Load(self, fileName):
519     self = pickle.load( open( fileName ) )