ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PubDBInfo.py
Revision: 1.1
Committed: Tue Aug 23 10:57:51 2005 UTC (19 years, 8 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
Log Message:
add files for pre2

File Contents

# User Rev Content
1 slacapra 1.1 #!/usr/bin/env python
2     import sys, os, string, re
3     import urllib, urllister
4     import urllib2
5     from UnserializePHP import *
6     from orcarcBuilderNew import *
7    
8     class PubDBInfoError:
9     def __init__(self, Collections):
10     print '\nERROR accessing PubDB for Collections: '+Collections+'\n'
11     pass
12     class PubDBInfoNoCollectionError:
13     def __init__(self, Collections):
14     print '\nERROR No Collections found in PubDB : '+Collections+'\n'
15     pass
16     class NoPHPError:
17     def __init__(self, url):
18     #print '\nERROR accessing PHP at '+url+' \n'
19     print 'ERROR accessing PHP: ',url,'isn\'t updated version \n'
20     pass
21     class PubDBInfoResult:
22     def __init__(self,
23     contents):
24     self.contents=contents
25    
26     ################################################################################
27     # Class to connect to PubDB interface for the analysis and download the data in one shot using the serialized PHP data.
28     ################################################################################
29     class PubDBInfo:
30     def __init__(self, pubdburl, Collections):
31     self.Collections= Collections
32     self.PubDBurl_ = pubdburl
33     #self.PubDBInfophp_ = 'pubdb-get-analisys-info.php'
34     self.PubDBInfophp_ = 'pubdb-get-analysisinfo.php'
35     self.protocolPrio_ = ['http', 'rfio', 'mysql' , 'gridftp']
36     #self.protocolPrio_ = ['http', 'mysql' , 'rfio' , 'gridftp']
37    
38     ##########################################################################
39     def GetPubDBInfo(self):
40     """
41     Get all the catalogues-related info from PubDB and select the best ones if multiple choices are possible
42     """
43     ### extract catalogues related info from pubDB
44     cataloguecoll_map = self.ExtractPubDBInfo()
45     ### select the Best catalogues
46     cataloguesinfos=self.SelectBestPubDBInfo(cataloguecoll_map)
47     return cataloguesinfos
48    
49     #########################################################################
50     def ExtractPubDBInfo(self):
51     """
52     Extract all the information from the PubDB analysis interface
53     """
54     try:
55     #print " contacting PubDb... "+self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections+"\n"
56     f = urllib.urlopen(self.PubDBurl_+self.PubDBInfophp_+'?collid='+self.Collections)
57     except IOError:
58     raise PubDBInfoError(self.Collections)
59    
60     data = f.read()
61     #print data
62     if len(data)>0:
63     if data[0]=='<':
64     raise PubDBInfoNoCollectionError(self.Collections)
65     try:
66     catalogues = PHPUnserialize().unserialize(data)
67     except IOError:
68     raise PHPUnserializeError(data)
69     try:
70     catinfos=[]
71     collmap={}
72     for k in catalogues.keys():
73     CollId=catalogues[k]['CollectionId']
74     colllist=[]
75     #print ">>> Catalogues for Collection: "+CollId+"\n"
76    
77     cat=catalogues[k]['Catalogue']
78     for kcat in cat.keys():
79     ##print ("key %s, val %s" %(kcat,cat[kcat]))
80     ContactString=cat[kcat]['ContactString']
81     ContactProtocol=cat[kcat]['ContactProtocol']
82     CatalogueType=cat[kcat]['CatalogueType']
83     ValidationStatus=cat[kcat]['ValidationStatus']
84     #print "CS: "+ContactString
85     #print "CP: "+ContactProtocol
86     #print "CT: "+CatalogueType
87     #print "VS: "+ValidationStatus
88     ce=cat[kcat]['CEs']
89     CElist=[]
90     for kce in ce.keys():
91     ##print ("key %s, val %s" %(kce,ce[kce]))
92     CE=ce[kce]
93     CElist.append(ce[kce])
94     #print " CE list :"
95     #for aCE in CElist:
96     # print " CE : "+aCE
97     cc=cat[kcat]['CatalogueContents']
98     for kcc in cc.keys():
99     ##print ("key %s, val %s" %(kcc,cc[kcc]))
100     FileType=cc[kcc]['FileType']
101     SE=cc[kcc]['SE']
102     #print "FT: "+FileType
103     #print "SE: "+SE
104     if cc[kcc]['Variables']==None:
105     Variables=''
106     else:
107     for kvar in cc[kcc]['Variables'].keys():
108     Variables=kvar+"="+cc[kcc]['Variables'][kvar]
109     #print "Variables: "+Variables
110     run=cc[kcc]['RunRange']
111     for krun in run.keys():
112     ##print ("key %s, val %s" %(krun,run[krun]))
113     reTot = re.compile(r'TotalEvents=(\d*)')
114     TotalEvents=reTot.search(run[krun]).group(1)
115     reFirst= re.compile(r'FirstRun=(\d*)')
116     FirstRun = reFirst.search(run[krun]).group(1)
117     reLast= re.compile(r'LastRun=(\d*)')
118     LastRun = reLast.search(run[krun]).group(1)
119     #print "Nevents: "+TotalEvents
120     #print "First: "+FirstRun
121     #print "Last: "+LastRun
122     #print "----------------------------------"
123    
124     ## fill a catlogue entry
125     acatalogue=catalogEntryNew(FileType,ValidationStatus,ContactString,ContactProtocol,CatalogueType,SE,CElist,TotalEvents,FirstRun+'-'+LastRun,Variables)
126     ## list the catalogues belonging to a given collection
127     colllist.append(acatalogue)
128    
129     ## dictionary grouping catalogues by CollectionID
130     collmap[CollId]=colllist
131    
132     except IOError:
133     raise PHPUnserializeError(data)
134    
135     return collmap
136    
137     ########################################################################
138     def SelectBestPubDBInfo(self,cataloglist):
139     """
140     Select the lists of needed catalogues (from a set of catalogues refering to the same collection and FileType selects the best on based on protocol)
141     """
142    
143     selectcatalogues=[]
144    
145     ### for each collection
146     for collid in cataloglist.keys():
147     #print ("key %s, val %s" %(collid,cataloglist[collid]))
148     ### get all the possible FileTypes
149     filetypes=[]
150     for catalog in cataloglist[collid]:
151     if catalog.FileType not in filetypes :
152     filetypes.append(catalog.FileType)
153     ### dictionary grouping catalogues by FileType
154     ftmap={}
155     for afiletype in filetypes:
156     #print ' filetype is '+afiletype+' for collid='+collid
157     sameFileType=[]
158     for catalog in cataloglist[collid]:
159     if catalog.FileType==afiletype :
160     sameFileType.append(catalog)
161     ftmap[afiletype]=sameFileType
162     ### select only one catalogue among the catalouges with the same FileType
163     for ft in ftmap.keys():
164     #print ("key %s, val %s" %(ft,ftmap[ft]))
165     bestcatalog=self.SelectBestCatalog(ftmap[ft])
166     selectcatalogues.append(bestcatalog)
167    
168     ### return the minimal list of needed catalogues
169     return selectcatalogues
170    
171     ####################################################
172     def SelectBestCatalog(self,ftcat):
173     """
174     From a set of catalogues with the same FileTypeand different access protocol, select the one according to access protocols
175     """
176     sortedProtocols = self.protocolPrio_
177    
178     ### if just one catalog, just return it!
179     if (len(ftcat)==1):
180     #print '----- Just one catalogue, no selection based on protocol needed'
181     #ftcat[0].dump()
182     #print '---------------------'
183     return ftcat[0]
184     ### oterwise select the best catalogue based on protocol
185     for prot in sortedProtocols:
186     for cat in ftcat:
187     if cat.ContactProtocol==prot:
188     #print '----- Catalogue selected based on protocol : '+prot
189     #cat.dump()
190     #print '---------------------'
191     return cat
192