UserCode/RateMonShiftTool_dev/AndrewWBMParser.py

from HTMLParser import HTMLParser
from urllib2 import urlopen
import cPickle as pickle
import os, sys
import time
import re

### need to overwrite some functions in the HTMLParser library
locatestarttagend = re.compile(r"""
        <[a-zA-Z][-.a-zA-Z0-9:_]*          # tag name
        (?:\s+                             # whitespace before attribute name
        (?:[a-zA-Z_][-.:a-zA-Z0-9_]*     # attribute name
        (?:\s*=\s*                     # value indicator
        (?:'[^']*'                   # LITA-enclosed value
        |\"[^\"]*\"                # LIT-enclosed value
        |this.src='[^']*'          # hack
        |[^'\">\s]+                # bare value
        )
        )?
        )
        )*
        \s*                                # trailing whitespace
        """, re.VERBOSE)

tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
attrfind = re.compile(
    r'\s*([a-zA-Z_][-.:a-zA-Z_0-9]*)(\s*=\s*'
    r'(\'[^\']*\'|"[^"]*"|[-a-zA-Z0-9./,:;+*%?!&$\(\)_#=~@]*))?')

class AndrewWBMParser(HTMLParser):
    
    def __init__(self):
        HTMLParser.__init__(self)
        self.InRow=0
        self.InEntry=0
        self.table =  []
        self.tmpRow = []
        self.hyperlinks = []

        ##-- Defined in ParsePage1 --##
        self.RunNumber = 0

        ##-- Defined in ParseRunPage --##
        self.RatePage = ''
        self.LumiPage = ''
        self.L1Page=''
        self.PrescaleChangesPage=''
        self.TriggerModePage=''
        self.Date=''
        self.HLT_Key=''

        ##-- Defined in ParseHLTSummaryPage --##
        self.TriggerRates = {}

        ##-- Defined in ParseLumiPage --##
        self.LSByLS = []
        self.InstLumiByLS = {}
        self.DeliveredLumiByLS = {}
        self.LiveLumiByLS = {}
        self.PSColumnByLS = {}
        self.PrescaleColumnString=''
        self.AvInstLumi = 0
        self.AvDeliveredLumi = 0
        self.AvLiveLumi = 0
        self.LumiInfo = []  ##Returns

        ##-- Defined in ParseL1Page (not currently used) --##
        self.L1Rates={}  ##Returns

        ##-- Defined in ParsePSColumnPage (not currently used) --##
        self.PSColumnChanges=[]  ##Returns

        ##-- Defined in ParseTriggerModePage --##
        self.L1TriggerMode={}
        self.HLTTriggerMode={}
        self.HLTSeed={}
        self.TriggerInfo = []  ##Returns

        ##-- Defined in AssemblePrescaleValues --##
        self.L1Prescale={}
        self.HLTPrescale={}
        self.MissingPrescale=[]
        self.PrescaleValues=[]  ##Returns

        ##-- Defined in ComputeTotalPrescales --##
        self.TotalPSInfo = []  ##Returns

        ##-- Defined in CorrectForPrescaleChange --##
        self.CorrectedPSInfo = []  ##Returns

        ##-- In the current Parser.py philosophy, only RunNumber is set globally
        ##    - LS range is set from the outside for each individual function
        #self.FirstLS = -1
        #self.LastLS = -1


    def parse_starttag(self, i):   ## Overwrite function from HTMLParser
        self.__starttag_text = None
        endpos = self.check_for_whole_start_tag(i)
        if endpos < 0:
            return endpos
        rawdata = self.rawdata
        self.__starttag_text = rawdata[i:endpos]

        # Now parse the data between i+1 and j into a tag and attrs
        attrs = []
        match = tagfind.match(rawdata, i+1)
        assert match, 'unexpected call to parse_starttag()'
        k = match.end()
        self.lasttag = tag = rawdata[i+1:k].lower()

        if tag == 'img':
            return endpos

        while k < endpos:
            m = attrfind.match(rawdata, k)
            if not m:
                break
            attrname, rest, attrvalue = m.group(1, 2, 3)
            if not rest:
                attrvalue = None
            elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
                 attrvalue[:1] == '"' == attrvalue[-1:]:
                attrvalue = attrvalue[1:-1]
                attrvalue = self.unescape(attrvalue)
            attrs.append((attrname.lower(), attrvalue))
            k = m.end()

        end = rawdata[k:endpos].strip()
        if end not in (">", "/>"):
            lineno, offset = self.getpos()
            if "\n" in self.__starttag_text:
                lineno = lineno + self.__starttag_text.count("\n")
                offset = len(self.__starttag_text) \
                         - self.__starttag_text.rfind("\n")
            else:
                offset = offset + len(self.__starttag_text)
            self.error("junk characters in start tag: %r"
                       % (rawdata[k:endpos][:20],))
        if end.endswith('/>'):
            # XHTML-style empty tag: <span attr="value" />
            self.handle_startendtag(tag, attrs)
        else:
            self.handle_starttag(tag, attrs)
            if tag in self.CDATA_CONTENT_ELEMENTS:
                self.set_cdata_mode()
        return endpos

    def check_for_whole_start_tag(self, i):
        rawdata = self.rawdata
        m = locatestarttagend.match(rawdata, i)
        if m:
            j = m.end()
            next = rawdata[j:j+1]
            #print next
            #if next == "'":
            #    j = rawdata.find(".jpg'",j)
            #    j = rawdata.find(".jpg'",j+1)
            #    next = rawdata[j:j+1]
            if next == ">":
                return j + 1
            if next == "/":
                if rawdata.startswith("/>", j):
                    return j + 2
                if rawdata.startswith("/", j):
                    # buffer boundary
                    return -1
                # else bogus input
            self.updatepos(i, j + 1)
            self.error("malformed empty start tag")
            if next == "":
                # end of input
                return -1
            if next in ("abcdefghijklmnopqrstuvwxyz=/"
                        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
                # end of input in or before attribute value, or we have the
                # '/' from a '/>' ending
                return -1
            self.updatepos(i, j)
            self.error("malformed start tag")
        raise AssertionError("we should not get here!")

    def _Parse(self,url):
        #try:
        #print self
        #print url
        self.table = []
        self.hyperlinks = []
        req = urlopen(url)
        self.feed(req.read())
        
        #except:
        #print "Error Getting page: "+url
        #print "Please retry.  If problem persists, contact developer"

    def handle_starttag(self,tag,attrs):
        if tag == 'a' and attrs:
            self.hyperlinks.append(attrs[0][1])
                
        if tag == 'tr':
            self.InRow=1
        if tag == 'td':
            self.InEntry=1

    def handle_endtag(self,tag):
        if tag =='tr':
            if self.InRow==1:
                self.InRow=0
                self.table.append(self.tmpRow)
                self.tmpRow=[]
        if tag == 'td':
            self.InEntry=0

    def handle_startendtag(self,tag, attrs):
        pass

    def handle_data(self,data):
        if self.InEntry:
            self.tmpRow.append(data)

    def ParsePage1(self):   ## Parse the Run list page to figure out what the most recent run was
        try:
            # Find the first non-empty row on page one
            MostRecent = self.table[0]
            print MostRecent
            for line in self.table:
                if line == []:
                    continue # skip empty rows, not exactly sure why they show up
                MostRecent = line
                break # find first non-empty line
            TriggerMode = MostRecent[3]
            self.RunNumber = MostRecent[0]    ## Set the run number

        
            isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1)  ## Is the most recent run a collisions run?
            if not isCollisions:
                return ''
            for link in self.hyperlinks:
                if not link.find('RUN='+self.RunNumber)==-1:
                    self.RunPage = link   ## Get the link to the run summary page and return
                    return link
        except:
            print 'No Recent run'
            return ''
        
    def ParseRunPage(self):
        for entry in self.hyperlinks:

            entry = entry.replace('../../','http://cmswbm/')
            if not entry.find('HLTSummary') == -1:
                self.RatePage = entry
            if not entry.find('L1Summary') == -1:
                self.L1Page = entry
            if not entry.find('LumiSections') == -1:
                self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
            if not entry.find('PrescaleChanges') == -1:
                self.PrescaleChangesPage = "http://cmswbm/cmsdb/servlet/"+entry
            if not entry.find('TriggerMode') == -1:
                self.TriggerModePage = entry
            #print self.table
            self.HLT_Key = self.table[8][0]
            #print self.HLT_Key
            self.Date = self.table[1][4]
            #print self.Date
            
        return [self.RatePage,self.LumiPage,self.L1Page,self.PrescaleChangesPage,self.TriggerModePage]


    def ParseHLTSummaryPage(self,StartLS,EndLS):

        for line in self.table:
            if not len(line)>6:  # All relevant lines in the table will be at least this long
                continue
            if line[1].startswith('HLT_'):
                TriggerName = line[1][:line[1].find('_v')+2] # Format is HLT_... (####), this gets rid of the (####)
                TriggerRate = float(line[6].replace(',','')) # Need to remove the ","s, since float() can't parse them
                L1Pass = int(line[3])
                PSPass = int(line[4])
                Seed = line[9]
                if int(line[4])>0: #line[3] is L1Pass, line[4] is PSPass
                    PS = float(line[3])/float(line[4])
                else:
                    if int(line[3])>0:
                        PS = line[3]
                    else:
                        PS = 1
                self.TriggerRates[TriggerName] = [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS]

        return self.TriggerRates

        
    def ParseLumiPage(self,StartLS,EndLS):

        for line in self.table:
            if len(line)<2 or len(line)>13:
                continue
            if float(line[8]) < 10 or float(line[9]) < 1: ##Beam 1 or Beam 2 absent
                continue

            self.LSByLS.append(int(line[0])) #LumiSection number is in position 0
            self.PSColumnByLS[int(line[0])] = int(line[2]) #Prescale column is in position 2            
            self.InstLumiByLS[int(line[0])] = round(float(line[4]),2) #Instantaneous luminosity (delivered?) is in position 4
            self.LiveLumiByLS[int(line[0])] = round(float(line[6]),2)  # Live lumi is in position 6
            self.DeliveredLumiByLS[int(line[0])] = round(float(line[5]),2) #Delivered lumi is in position 5

        if StartLS < 0:
            EndLS = max(self.LSByLS) - 3
            StartLS = EndLS + StartLS
        if StartLS < 2: #The parser does not parse the first LS
            StartLS = 2
        if StartLS == 999999:
            StartLS = min(self.LSByLS)
        if EndLS == 111111:
            EndLS = max(self.LSByLS)
        if EndLS <= StartLS:
            print "In ParseLumiPage, EndLS <= StartLS"

        #print "In ParseLumiPage, StartLS = "+str(StartLS)+" and EndLS = "+str(EndLS)

        self.AvLiveLumi = 1000*(self.LiveLumiByLS[EndLS] - self.LiveLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
        self.AvDeliveredLumi = 1000*(self.DeliveredLumiByLS[EndLS] - self.DeliveredLumiByLS[StartLS])/(23.3*(EndLS-StartLS))
        value_iterator = 0
        for value in self.LSByLS:
            if value >= StartLS and value <= EndLS:
                self.AvInstLumi+=self.InstLumiByLS[value]
                value_iterator+=1
        self.AvInstLumi = self.AvInstLumi / value_iterator

        ### PS column info

        self.LumiInfo = [self.LSByLS, self.PSColumnByLS, self.InstLumiByLS, self.DeliveredLumiByLS, self.LiveLumiByLS, self.AvInstLumi, self.AvDeliveredLumi, self.AvLiveLumi]
            
        return [self.LumiInfo,StartLS,EndLS]
    

    def ParseL1Page(self): ##Not used for anything - get this information with ParseTriggerModePage
        for line in self.table:
            if len(line) < 10:
                continue
            if line[1].startswith('L1_'):
                try:
                    self.L1Rates[line[1]] = float(line[len(line)-4])
                except:
                    correctedNumber = line[len(line)-4].replace(",","")
                    self.L1Rates[line[1]] = float(correctedNumber)
                    
        return self.L1Rates

    def ParsePSColumnPage(self):
        for line in self.table:
            if len(line) < 5 or line[0].startswith('Run'):
                continue
            self.PSColumnChanges.append([int(line[1]),int(line[2])]) #line[1] is the first LS of a new PS column, line[2] is the column index
        return self.PSColumnChanges

    def ParseTriggerModePage(self):
        for line in self.table:
            if len(line) < 6 or line[0].startswith('n'):
                continue
            if len(line) > 11:
                print line
            if line[1].startswith('L1_'):
                self.L1TriggerMode[line[1]] = []
                for n in range(2, len(line)): #"range" does not include the last element (i.e. there is no n = len(line))
                    self.L1TriggerMode[line[1]].append(int(line[n]))
                    
            if line[1].startswith('HLT_'):
                HLTStringName = line[1]
                for s in HLTStringName.split("_v"): #Eliminates version number from the string name
                    if s.isdigit():
                        numbertoreplace = s
                HLTStringName = HLTStringName.replace('_v'+str(numbertoreplace),'_v')
                
                self.HLTTriggerMode[HLTStringName] = []

                for n in range(3, len(line)-1): #The parser counts the number in parentheses after the trigger name as its own column
                    self.HLTTriggerMode[HLTStringName].append(int(line[n]))
                        
                if line[len(line)-1].startswith('L1_'):
                    self.HLTSeed[HLTStringName] = line[len(line)-1]
                else:
                    if not " OR" in line[len(line)-1]:
                        self.HLTTriggerMode[HLTStringName].append(int(line[n]))
                        self.HLTSeed[HLTStringName] = "NULL"
                    else:
                        self.HLTSeed[HLTStringName] = str(line[len(line)-1])

        self.TriggerInfo = [self.L1TriggerMode,self.HLTTriggerMode,self.HLTSeed]
        return self.TriggerInfo

    def AssemblePrescaleValues(self): ##Depends on output from ParseLumiPage and ParseTriggerModePage
        MissingName = "Nemo"
        for key in self.L1TriggerMode:
            self.L1Prescale[key] = {}
            for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range()" excludes the last element
                try:
                    self.L1Prescale[key][n] = self.L1TriggerMode[key][self.PSColumnByLS[n]]
                except:
                    if not key == MissingName:
                        self.MissingPrescale.append(key)
                        MissingName = key
                    if not n < 2:
                        print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"

        for key in self.HLTTriggerMode:
            self.HLTPrescale[key] = {}
            for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range" excludes the last element
                try:
                    self.HLTPrescale[key][n] = self.HLTTriggerMode[key][self.PSColumnByLS[n]]
                except:
                    if not key == MissingName:
                        self.MissingPrescale.append(key)
                        MissingName = key
                    if not n < 2:
                        print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"

        self.PrescaleValues = [self.L1Prescale,self.HLTPrescale,self.MissingPrescale]
        return self.PrescaleValues

    def ComputeTotalPrescales(self,StartLS,EndLS):
        IdealHLTPrescale = {}
        IdealPrescale = {}
        L1_zero = {}
        HLT_zero = {}
        n1 = {}
        n2 = {}
        L1 = {}
        L2 = {}
        H1 = {}
        H2 = {}
        InitialColumnIndex = self.PSColumnByLS[int(StartLS)]

        for key in self.HLTTriggerMode:
            try:
                DoesThisPathHaveAValidL1SeedWithPrescale = self.L1Prescale[self.HLTSeed[key]][StartLS]
            except:
                L1_zero[key] = True
                HLT_zero[key] = False
                continue

            IdealHLTPrescale[key] = 0.0
            IdealPrescale[key] = 0.0
            n1[key] = 0
            L1_zero[key] = False
            HLT_zero[key] = False

            for LSIterator in range(StartLS,EndLS+1): #"range" excludes the last element
                if self.L1Prescale[self.HLTSeed[key]][LSIterator] > 0 and self.HLTPrescale[key][LSIterator] > 0:
                    IdealPrescale[key]+=1.0/(self.L1Prescale[self.HLTSeed[key]][LSIterator]*self.HLTPrescale[key][LSIterator])
                else:
                    IdealPrescale[key]+=1.0 ##To prevent a divide by 0 error later
                    if self.L1Prescale[self.HLTSeed[key]][LSIterator] < 0.1:
                        L1_zero[key] = True
                    if self.HLTPrescale[key][LSIterator] < 0.1:
                        HLT_zero[key] = True
                if self.PSColumnByLS[LSIterator] == InitialColumnIndex:
                    n1[key]+=1

            if L1_zero[key] == True or HLT_zero[key] == True:
                continue

            IdealPrescale[key] = (EndLS + 1 - StartLS)/IdealPrescale[key]

            n2[key] = float(EndLS + 1 - StartLS - n1[key])
            L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS])
            L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS])
            H1[key] = float(self.HLTPrescale[key][StartLS])
            H2[key] = float(self.HLTPrescale[key][EndLS])

            IdealHLTPrescale[key] = ((n1[key]/L1[key])+(n2[key]/L2[key]))/((n1[key]/(L1[key]*H1[key]))+(n2[key]/(L2[key]*H2[key])))

        self.TotalPSInfo = [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2]

        return self.TotalPSInfo

        
    def CorrectForPrescaleChange(self,StartLS,EndLS):
        [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2] = self.TotalPSInfo
        xLS = {}
        RealPrescale = {}

        for key in self.HLTTriggerMode:
            if L1_zero[key] == True or HLT_zero[key] == True:
                continue
            [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS] = self.TriggerRates[key]
            if PS > 0.95 * IdealHLTPrescale[key] and PS < 1.05 * IdealHLTPrescale[key]:
                RealPrescale[key] = IdealPrescale[key]
                continue
                
            if H1[key] == H2[key] and L1[key] == L2[key] and not EndLS > max(self.LSByLS) - 1: ##Look for prescale change into the next LS
                H2[key] = float(self.HLTPrescale[key][EndLS+1])
                L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS+1])
            if H1[key] == H2[key] and L1[key] == L2[key] and not StartLS < 3:
                H1[key] = float(self.HLTPrescale[key][StartLS-1])
                L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS-1])
            if H1[key] == H2[key]:
                xLS[key] = 0
            else:
                xLS[key] = ((-(PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]*n1[key]+L1[key]*n2[key])))/(((PS/IdealHLTPrescale[key])*(L2[key]*n1[key]+L1[key]*n2[key])*(H1[key]*L1[key]-H2[key]*L2[key]))+((H2[key]*L2[key]*n1[key]+H1[key]*L1[key]*n2[key])*(L2[key]-L1[key])))

            if xLS[key] > 1:
                xLS[key] = 1
            if xLS[key] < -1:
                xLS[key] = -1
            RealPrescale[key] = (n1[key] + n2[key])/(((n1[key] - xLS[key])/(H1[key]*L1[key]))+(n2[key]+xLS[key])/(H2[key]*L2[key]))

        self.CorrectedPSInfo = [RealPrescale,xLS,L1,L2,H1,H2]

        return self.CorrectedPSInfo
        
    def Save(self, fileName):
        dir = os.path.dirname(fileName)    
        if not os.path.exists(dir):
            os.makedirs(dir)
        pickle.dump( self, open( fileName, 'w' ) )

    def Load(self, fileName):
        self = pickle.load( open( fileName ) )
Revision:	1.3
Committed:	Wed Oct 31 16:26:50 2012 UTC (12 years, 5 months ago) by awoodard
Content type:	text/x-python
Branch:	MAIN
CVS Tags:	HEAD
Changes since 1.2:	+0 -0 lines
State:	*FILE REMOVED*
Log Message:	depreciated
#	Content
1	from HTMLParser import HTMLParser
2	from urllib2 import urlopen
3	import cPickle as pickle
4	import os, sys
5	import time
6	import re
7
8	### need to overwrite some functions in the HTMLParser library
9	locatestarttagend = re.compile(r"""
10	<[a-zA-Z][-.a-zA-Z0-9:_]* # tag name
11	(?:\s+ # whitespace before attribute name
12	(?:[a-zA-Z_][-.:a-zA-Z0-9_]* # attribute name
13	(?:\s=\s # value indicator
14	(?:'[^']*' # LITA-enclosed value
15	\|\"[^\"]*\" # LIT-enclosed value
16	\|this.src='[^']*' # hack
17	\|[^'\">\s]+ # bare value
18	)
19	)?
20	)
21	)*
22	\s* # trailing whitespace
23	""", re.VERBOSE)
24
25	tagfind = re.compile('[a-zA-Z][-.a-zA-Z0-9:_]*')
26	attrfind = re.compile(
27	r'\s([a-zA-Z_][-.:a-zA-Z_0-9])(\s=\s'
28	r'(\'[^\']\'\|"[^"]"\|[-a-zA-Z0-9./,:;+%?!&$\(\)_#=~@]))?')
29
30	class AndrewWBMParser(HTMLParser):
31
32	def __init__(self):
33	HTMLParser.__init__(self)
34	self.InRow=0
35	self.InEntry=0
36	self.table = []
37	self.tmpRow = []
38	self.hyperlinks = []
39
40	##-- Defined in ParsePage1 --##
41	self.RunNumber = 0
42
43	##-- Defined in ParseRunPage --##
44	self.RatePage = ''
45	self.LumiPage = ''
46	self.L1Page=''
47	self.PrescaleChangesPage=''
48	self.TriggerModePage=''
49	self.Date=''
50	self.HLT_Key=''
51
52	##-- Defined in ParseHLTSummaryPage --##
53	self.TriggerRates = {}
54
55	##-- Defined in ParseLumiPage --##
56	self.LSByLS = []
57	self.InstLumiByLS = {}
58	self.DeliveredLumiByLS = {}
59	self.LiveLumiByLS = {}
60	self.PSColumnByLS = {}
61	self.PrescaleColumnString=''
62	self.AvInstLumi = 0
63	self.AvDeliveredLumi = 0
64	self.AvLiveLumi = 0
65	self.LumiInfo = [] ##Returns
66
67	##-- Defined in ParseL1Page (not currently used) --##
68	self.L1Rates={} ##Returns
69
70	##-- Defined in ParsePSColumnPage (not currently used) --##
71	self.PSColumnChanges=[] ##Returns
72
73	##-- Defined in ParseTriggerModePage --##
74	self.L1TriggerMode={}
75	self.HLTTriggerMode={}
76	self.HLTSeed={}
77	self.TriggerInfo = [] ##Returns
78
79	##-- Defined in AssemblePrescaleValues --##
80	self.L1Prescale={}
81	self.HLTPrescale={}
82	self.MissingPrescale=[]
83	self.PrescaleValues=[] ##Returns
84
85	##-- Defined in ComputeTotalPrescales --##
86	self.TotalPSInfo = [] ##Returns
87
88	##-- Defined in CorrectForPrescaleChange --##
89	self.CorrectedPSInfo = [] ##Returns
90
91	##-- In the current Parser.py philosophy, only RunNumber is set globally
92	## - LS range is set from the outside for each individual function
93	#self.FirstLS = -1
94	#self.LastLS = -1
95
96
97	def parse_starttag(self, i): ## Overwrite function from HTMLParser
98	self.__starttag_text = None
99	endpos = self.check_for_whole_start_tag(i)
100	if endpos < 0:
101	return endpos
102	rawdata = self.rawdata
103	self.__starttag_text = rawdata[i:endpos]
104
105	# Now parse the data between i+1 and j into a tag and attrs
106	attrs = []
107	match = tagfind.match(rawdata, i+1)
108	assert match, 'unexpected call to parse_starttag()'
109	k = match.end()
110	self.lasttag = tag = rawdata[i+1:k].lower()
111
112	if tag == 'img':
113	return endpos
114
115	while k < endpos:
116	m = attrfind.match(rawdata, k)
117	if not m:
118	break
119	attrname, rest, attrvalue = m.group(1, 2, 3)
120	if not rest:
121	attrvalue = None
122	elif attrvalue[:1] == '\'' == attrvalue[-1:] or \
123	attrvalue[:1] == '"' == attrvalue[-1:]:
124	attrvalue = attrvalue[1:-1]
125	attrvalue = self.unescape(attrvalue)
126	attrs.append((attrname.lower(), attrvalue))
127	k = m.end()
128
129	end = rawdata[k:endpos].strip()
130	if end not in (">", "/>"):
131	lineno, offset = self.getpos()
132	if "\n" in self.__starttag_text:
133	lineno = lineno + self.__starttag_text.count("\n")
134	offset = len(self.__starttag_text) \
135	- self.__starttag_text.rfind("\n")
136	else:
137	offset = offset + len(self.__starttag_text)
138	self.error("junk characters in start tag: %r"
139	% (rawdata[k:endpos][:20],))
140	if end.endswith('/>'):
141	# XHTML-style empty tag: <span attr="value" />
142	self.handle_startendtag(tag, attrs)
143	else:
144	self.handle_starttag(tag, attrs)
145	if tag in self.CDATA_CONTENT_ELEMENTS:
146	self.set_cdata_mode()
147	return endpos
148
149	def check_for_whole_start_tag(self, i):
150	rawdata = self.rawdata
151	m = locatestarttagend.match(rawdata, i)
152	if m:
153	j = m.end()
154	next = rawdata[j:j+1]
155	#print next
156	#if next == "'":
157	# j = rawdata.find(".jpg'",j)
158	# j = rawdata.find(".jpg'",j+1)
159	# next = rawdata[j:j+1]
160	if next == ">":
161	return j + 1
162	if next == "/":
163	if rawdata.startswith("/>", j):
164	return j + 2
165	if rawdata.startswith("/", j):
166	# buffer boundary
167	return -1
168	# else bogus input
169	self.updatepos(i, j + 1)
170	self.error("malformed empty start tag")
171	if next == "":
172	# end of input
173	return -1
174	if next in ("abcdefghijklmnopqrstuvwxyz=/"
175	"ABCDEFGHIJKLMNOPQRSTUVWXYZ"):
176	# end of input in or before attribute value, or we have the
177	# '/' from a '/>' ending
178	return -1
179	self.updatepos(i, j)
180	self.error("malformed start tag")
181	raise AssertionError("we should not get here!")
182
183	def _Parse(self,url):
184	#try:
185	#print self
186	#print url
187	self.table = []
188	self.hyperlinks = []
189	req = urlopen(url)
190	self.feed(req.read())
191
192	#except:
193	#print "Error Getting page: "+url
194	#print "Please retry. If problem persists, contact developer"
195
196	def handle_starttag(self,tag,attrs):
197	if tag == 'a' and attrs:
198	self.hyperlinks.append(attrs[0][1])
199
200	if tag == 'tr':
201	self.InRow=1
202	if tag == 'td':
203	self.InEntry=1
204
205	def handle_endtag(self,tag):
206	if tag =='tr':
207	if self.InRow==1:
208	self.InRow=0
209	self.table.append(self.tmpRow)
210	self.tmpRow=[]
211	if tag == 'td':
212	self.InEntry=0
213
214	def handle_startendtag(self,tag, attrs):
215	pass
216
217	def handle_data(self,data):
218	if self.InEntry:
219	self.tmpRow.append(data)
220
221	def ParsePage1(self): ## Parse the Run list page to figure out what the most recent run was
222	try:
223	# Find the first non-empty row on page one
224	MostRecent = self.table[0]
225	print MostRecent
226	for line in self.table:
227	if line == []:
228	continue # skip empty rows, not exactly sure why they show up
229	MostRecent = line
230	break # find first non-empty line
231	TriggerMode = MostRecent[3]
232	self.RunNumber = MostRecent[0] ## Set the run number
233
234
235
236	isCollisions = not (TriggerMode.find('l1_hlt_collisions') == -1) ## Is the most recent run a collisions run?
237	if not isCollisions:
238	return ''
239	for link in self.hyperlinks:
240	if not link.find('RUN='+self.RunNumber)==-1:
241	self.RunPage = link ## Get the link to the run summary page and return
242	return link
243	except:
244	print 'No Recent run'
245	return ''
246
247	def ParseRunPage(self):
248	for entry in self.hyperlinks:
249
250	entry = entry.replace('../../','http://cmswbm/')
251	if not entry.find('HLTSummary') == -1:
252	self.RatePage = entry
253	if not entry.find('L1Summary') == -1:
254	self.L1Page = entry
255	if not entry.find('LumiSections') == -1:
256	self.LumiPage = "http://cmswbm/cmsdb/servlet/"+entry
257	if not entry.find('PrescaleChanges') == -1:
258	self.PrescaleChangesPage = "http://cmswbm/cmsdb/servlet/"+entry
259	if not entry.find('TriggerMode') == -1:
260	self.TriggerModePage = entry
261	#print self.table
262	self.HLT_Key = self.table[8][0]
263	#print self.HLT_Key
264	self.Date = self.table[1][4]
265	#print self.Date
266
267	return [self.RatePage,self.LumiPage,self.L1Page,self.PrescaleChangesPage,self.TriggerModePage]
268
269
270	def ParseHLTSummaryPage(self,StartLS,EndLS):
271
272	for line in self.table:
273	if not len(line)>6: # All relevant lines in the table will be at least this long
274	continue
275	if line[1].startswith('HLT_'):
276	TriggerName = line[1][:line[1].find('_v')+2] # Format is HLT_... (####), this gets rid of the (####)
277	TriggerRate = float(line[6].replace(',','')) # Need to remove the ","s, since float() can't parse them
278	L1Pass = int(line[3])
279	PSPass = int(line[4])
280	Seed = line[9]
281	if int(line[4])>0: #line[3] is L1Pass, line[4] is PSPass
282	PS = float(line[3])/float(line[4])
283	else:
284	if int(line[3])>0:
285	PS = line[3]
286	else:
287	PS = 1
288	self.TriggerRates[TriggerName] = [TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS]
289
290	return self.TriggerRates
291
292
293	def ParseLumiPage(self,StartLS,EndLS):
294
295	for line in self.table:
296	if len(line)<2 or len(line)>13:
297	continue
298	if float(line[8]) < 10 or float(line[9]) < 1: ##Beam 1 or Beam 2 absent
299	continue
300
301	self.LSByLS.append(int(line[0])) #LumiSection number is in position 0
302	self.PSColumnByLS[int(line[0])] = int(line[2]) #Prescale column is in position 2
303	self.InstLumiByLS[int(line[0])] = round(float(line[4]),2) #Instantaneous luminosity (delivered?) is in position 4
304	self.LiveLumiByLS[int(line[0])] = round(float(line[6]),2) # Live lumi is in position 6
305	self.DeliveredLumiByLS[int(line[0])] = round(float(line[5]),2) #Delivered lumi is in position 5
306
307	if StartLS < 0:
308	EndLS = max(self.LSByLS) - 3
309	StartLS = EndLS + StartLS
310	if StartLS < 2: #The parser does not parse the first LS
311	StartLS = 2
312	if StartLS == 999999:
313	StartLS = min(self.LSByLS)
314	if EndLS == 111111:
315	EndLS = max(self.LSByLS)
316	if EndLS <= StartLS:
317	print "In ParseLumiPage, EndLS <= StartLS"
318
319	#print "In ParseLumiPage, StartLS = "+str(StartLS)+" and EndLS = "+str(EndLS)
320
321	self.AvLiveLumi = 1000(self.LiveLumiByLS[EndLS] - self.LiveLumiByLS[StartLS])/(23.3(EndLS-StartLS))
322	self.AvDeliveredLumi = 1000(self.DeliveredLumiByLS[EndLS] - self.DeliveredLumiByLS[StartLS])/(23.3(EndLS-StartLS))
323	value_iterator = 0
324	for value in self.LSByLS:
325	if value >= StartLS and value <= EndLS:
326	self.AvInstLumi+=self.InstLumiByLS[value]
327	value_iterator+=1
328	self.AvInstLumi = self.AvInstLumi / value_iterator
329
330	### PS column info
331
332	self.LumiInfo = [self.LSByLS, self.PSColumnByLS, self.InstLumiByLS, self.DeliveredLumiByLS, self.LiveLumiByLS, self.AvInstLumi, self.AvDeliveredLumi, self.AvLiveLumi]
333
334	return [self.LumiInfo,StartLS,EndLS]
335
336
337	def ParseL1Page(self): ##Not used for anything - get this information with ParseTriggerModePage
338	for line in self.table:
339	if len(line) < 10:
340	continue
341	if line[1].startswith('L1_'):
342	try:
343	self.L1Rates[line[1]] = float(line[len(line)-4])
344	except:
345	correctedNumber = line[len(line)-4].replace(",","")
346	self.L1Rates[line[1]] = float(correctedNumber)
347
348	return self.L1Rates
349
350	def ParsePSColumnPage(self):
351	for line in self.table:
352	if len(line) < 5 or line[0].startswith('Run'):
353	continue
354	self.PSColumnChanges.append([int(line[1]),int(line[2])]) #line[1] is the first LS of a new PS column, line[2] is the column index
355	return self.PSColumnChanges
356
357	def ParseTriggerModePage(self):
358	for line in self.table:
359	if len(line) < 6 or line[0].startswith('n'):
360	continue
361	if len(line) > 11:
362	print line
363	if line[1].startswith('L1_'):
364	self.L1TriggerMode[line[1]] = []
365	for n in range(2, len(line)): #"range" does not include the last element (i.e. there is no n = len(line))
366	self.L1TriggerMode[line[1]].append(int(line[n]))
367
368	if line[1].startswith('HLT_'):
369	HLTStringName = line[1]
370	for s in HLTStringName.split("_v"): #Eliminates version number from the string name
371	if s.isdigit():
372	numbertoreplace = s
373	HLTStringName = HLTStringName.replace('_v'+str(numbertoreplace),'_v')
374
375	self.HLTTriggerMode[HLTStringName] = []
376
377	for n in range(3, len(line)-1): #The parser counts the number in parentheses after the trigger name as its own column
378	self.HLTTriggerMode[HLTStringName].append(int(line[n]))
379
380	if line[len(line)-1].startswith('L1_'):
381	self.HLTSeed[HLTStringName] = line[len(line)-1]
382	else:
383	if not " OR" in line[len(line)-1]:
384	self.HLTTriggerMode[HLTStringName].append(int(line[n]))
385	self.HLTSeed[HLTStringName] = "NULL"
386	else:
387	self.HLTSeed[HLTStringName] = str(line[len(line)-1])
388
389	self.TriggerInfo = [self.L1TriggerMode,self.HLTTriggerMode,self.HLTSeed]
390	return self.TriggerInfo
391
392	def AssemblePrescaleValues(self): ##Depends on output from ParseLumiPage and ParseTriggerModePage
393	MissingName = "Nemo"
394	for key in self.L1TriggerMode:
395	self.L1Prescale[key] = {}
396	for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range()" excludes the last element
397	try:
398	self.L1Prescale[key][n] = self.L1TriggerMode[key][self.PSColumnByLS[n]]
399	except:
400	if not key == MissingName:
401	self.MissingPrescale.append(key)
402	MissingName = key
403	if not n < 2:
404	print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
405
406	for key in self.HLTTriggerMode:
407	self.HLTPrescale[key] = {}
408	for n in range(min(self.LSByLS),max(self.LSByLS)+1): #"range" excludes the last element
409	try:
410	self.HLTPrescale[key][n] = self.HLTTriggerMode[key][self.PSColumnByLS[n]]
411	except:
412	if not key == MissingName:
413	self.MissingPrescale.append(key)
414	MissingName = key
415	if not n < 2:
416	print "LS "+str(n)+" of key "+str(key)+" is missing from the LumiSections page"
417
418	self.PrescaleValues = [self.L1Prescale,self.HLTPrescale,self.MissingPrescale]
419	return self.PrescaleValues
420
421	def ComputeTotalPrescales(self,StartLS,EndLS):
422	IdealHLTPrescale = {}
423	IdealPrescale = {}
424	L1_zero = {}
425	HLT_zero = {}
426	n1 = {}
427	n2 = {}
428	L1 = {}
429	L2 = {}
430	H1 = {}
431	H2 = {}
432	InitialColumnIndex = self.PSColumnByLS[int(StartLS)]
433
434	for key in self.HLTTriggerMode:
435	try:
436	DoesThisPathHaveAValidL1SeedWithPrescale = self.L1Prescale[self.HLTSeed[key]][StartLS]
437	except:
438	L1_zero[key] = True
439	HLT_zero[key] = False
440	continue
441
442	IdealHLTPrescale[key] = 0.0
443	IdealPrescale[key] = 0.0
444	n1[key] = 0
445	L1_zero[key] = False
446	HLT_zero[key] = False
447
448	for LSIterator in range(StartLS,EndLS+1): #"range" excludes the last element
449	if self.L1Prescale[self.HLTSeed[key]][LSIterator] > 0 and self.HLTPrescale[key][LSIterator] > 0:
450	IdealPrescale[key]+=1.0/(self.L1Prescale[self.HLTSeed[key]][LSIterator]*self.HLTPrescale[key][LSIterator])
451	else:
452	IdealPrescale[key]+=1.0 ##To prevent a divide by 0 error later
453	if self.L1Prescale[self.HLTSeed[key]][LSIterator] < 0.1:
454	L1_zero[key] = True
455	if self.HLTPrescale[key][LSIterator] < 0.1:
456	HLT_zero[key] = True
457	if self.PSColumnByLS[LSIterator] == InitialColumnIndex:
458	n1[key]+=1
459
460	if L1_zero[key] == True or HLT_zero[key] == True:
461	continue
462
463	IdealPrescale[key] = (EndLS + 1 - StartLS)/IdealPrescale[key]
464
465	n2[key] = float(EndLS + 1 - StartLS - n1[key])
466	L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS])
467	L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS])
468	H1[key] = float(self.HLTPrescale[key][StartLS])
469	H2[key] = float(self.HLTPrescale[key][EndLS])
470
471	IdealHLTPrescale[key] = ((n1[key]/L1[key])+(n2[key]/L2[key]))/((n1[key]/(L1[key]H1[key]))+(n2[key]/(L2[key]H2[key])))
472
473	self.TotalPSInfo = [L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2]
474
475	return self.TotalPSInfo
476
477
478	def CorrectForPrescaleChange(self,StartLS,EndLS):
479	[L1_zero,HLT_zero,IdealPrescale,IdealHLTPrescale,n1,n2,L1,L2,H1,H2] = self.TotalPSInfo
480	xLS = {}
481	RealPrescale = {}
482
483	for key in self.HLTTriggerMode:
484	if L1_zero[key] == True or HLT_zero[key] == True:
485	continue
486	[TriggerRate,L1Pass,PSPass,PS,Seed,StartLS,EndLS] = self.TriggerRates[key]
487	if PS > 0.95 * IdealHLTPrescale[key] and PS < 1.05 * IdealHLTPrescale[key]:
488	RealPrescale[key] = IdealPrescale[key]
489	continue
490
491	if H1[key] == H2[key] and L1[key] == L2[key] and not EndLS > max(self.LSByLS) - 1: ##Look for prescale change into the next LS
492	H2[key] = float(self.HLTPrescale[key][EndLS+1])
493	L2[key] = float(self.L1Prescale[self.HLTSeed[key]][EndLS+1])
494	if H1[key] == H2[key] and L1[key] == L2[key] and not StartLS < 3:
495	H1[key] = float(self.HLTPrescale[key][StartLS-1])
496	L1[key] = float(self.L1Prescale[self.HLTSeed[key]][StartLS-1])
497	if H1[key] == H2[key]:
498	xLS[key] = 0
499	else:
500	xLS[key] = ((-(PS/IdealHLTPrescale[key])(L2[key]n1[key]+L1[key]n2[key])(H2[key]L2[key]n1[key]+H1[key]L1[key]n2[key]))+((H2[key]L2[key]n1[key]+H1[key]L1[key]n2[key])(L2[key]n1[key]+L1[key]n2[key])))/(((PS/IdealHLTPrescale[key])(L2[key]n1[key]+L1[key]n2[key])(H1[key]L1[key]-H2[key]L2[key]))+((H2[key]L2[key]n1[key]+H1[key]L1[key]n2[key])(L2[key]-L1[key])))
501
502	if xLS[key] > 1:
503	xLS[key] = 1
504	if xLS[key] < -1:
505	xLS[key] = -1
506	RealPrescale[key] = (n1[key] + n2[key])/(((n1[key] - xLS[key])/(H1[key]L1[key]))+(n2[key]+xLS[key])/(H2[key]L2[key]))
507
508	self.CorrectedPSInfo = [RealPrescale,xLS,L1,L2,H1,H2]
509
510	return self.CorrectedPSInfo
511
512	def Save(self, fileName):
513	dir = os.path.dirname(fileName)
514	if not os.path.exists(dir):
515	os.makedirs(dir)
516	pickle.dump( self, open( fileName, 'w' ) )
517
518	def Load(self, fileName):
519	self = pickle.load( open( fileName ) )