1 |
ewv |
1.6 |
#!/usr/bin/env python
|
2 |
|
|
"""
|
3 |
|
|
_BlackWhiteListParser_
|
4 |
|
|
|
5 |
|
|
Parsing for black and white lists, both SE and CE
|
6 |
|
|
|
7 |
|
|
Large parts of the July 2008 re-write come from Brian Bockelman
|
8 |
|
|
|
9 |
|
|
"""
|
10 |
|
|
|
11 |
|
|
__revision__ = "$Id: SiteDB.py,v 1.5 2008/07/08 22:19:00 ewv Exp $"
|
12 |
|
|
__version__ = "$Revision: 1.5 $"
|
13 |
|
|
|
14 |
|
|
|
15 |
|
|
import os
|
16 |
|
|
import sys
|
17 |
|
|
import sets
|
18 |
|
|
import time
|
19 |
|
|
import types
|
20 |
|
|
import fnmatch
|
21 |
|
|
|
22 |
gutsche |
1.1 |
from crab_logger import Logger
|
23 |
|
|
from crab_exceptions import *
|
24 |
|
|
from crab_util import *
|
25 |
|
|
import common
|
26 |
ewv |
1.6 |
from ProdCommon.SiteDB.SiteDB import SiteDBJSON
|
27 |
|
|
|
28 |
|
|
class BlackWhiteListParser(object):
|
29 |
gutsche |
1.1 |
|
30 |
ewv |
1.6 |
"""
|
31 |
|
|
A class which applies blacklist and whitelist; designed to allow the user
|
32 |
|
|
to filter out sites. Allows users to specify only the CMS name from SiteDB
|
33 |
|
|
(and simple wildcards), but internally filters only on the CE/SE name.
|
34 |
|
|
"""
|
35 |
|
|
|
36 |
|
|
def __init__(self, cfg_params):
|
37 |
|
|
self.kind = 'se'
|
38 |
|
|
self.siteDBAPI = SiteDBJSON()
|
39 |
gutsche |
1.1 |
|
40 |
ewv |
1.6 |
def configure(self, cfg_params):
|
41 |
|
|
"""
|
42 |
|
|
Load up the black and white list from the configuation parameters
|
43 |
|
|
* EDG.%s_black_list
|
44 |
|
|
* EDG.%s_white_list
|
45 |
|
|
and expand things that SiteDB knows the CMS names for
|
46 |
|
|
""" % (self.kind, self.kind)
|
47 |
|
|
|
48 |
|
|
self.blacklist = []
|
49 |
|
|
if cfg_params.has_key('EDG.%s_black_list' % self.kind):
|
50 |
|
|
user_input = cfg_params['EDG.%s_black_list' % self.kind]
|
51 |
|
|
self.blacklist = self.expandList(user_input)
|
52 |
|
|
common.logger.debug(5,'Converted %s blacklist: %s' % (self.kind, ', '.join(self.blacklist)))
|
53 |
|
|
|
54 |
|
|
self.whitelist = []
|
55 |
|
|
if cfg_params.has_key('EDG.%s_white_list' % self.kind):
|
56 |
|
|
user_input = cfg_params['EDG.%s_white_list' % self.kind]
|
57 |
|
|
self.whitelist = self.expandList(user_input)
|
58 |
|
|
common.logger.debug(5, 'Converted %s whitelist: %s' % (self.kind, ', '.join(self.whitelist)))
|
59 |
|
|
|
60 |
|
|
self.blacklist = sets.Set(self.blacklist)
|
61 |
|
|
self.whitelist = sets.Set(self.whitelist)
|
62 |
|
|
#print "User's %s blacklist: %s" % (self.kind,self.blacklist)
|
63 |
|
|
#print "User's %s whitelist: %s" % (self.kind,self.whitelist)
|
64 |
|
|
|
65 |
|
|
def expandList(self, userInput):
|
66 |
|
|
userList = userInput.split(',')
|
67 |
|
|
expandedList = []
|
68 |
|
|
for item in userList:
|
69 |
|
|
item = item.strip()
|
70 |
|
|
expandedItem = self.mapper(item)
|
71 |
|
|
if expandedItem:
|
72 |
|
|
expandedList.extend(expandedItem)
|
73 |
|
|
else:
|
74 |
|
|
expandedList.append(item)
|
75 |
ewv |
1.5 |
|
76 |
ewv |
1.6 |
return expandedList
|
77 |
gutsche |
1.1 |
|
78 |
ewv |
1.6 |
def checkBlackList(self, Sites, fileblocks=''):
|
79 |
|
|
"""
|
80 |
|
|
Select sites that are not excluded by the user (via blacklist)
|
81 |
gutsche |
1.1 |
|
82 |
ewv |
1.6 |
The sites returned are the input sites minus the contents of the
|
83 |
|
|
self.blacklist
|
84 |
|
|
|
85 |
|
|
@param Sites: The sites which will be filtered
|
86 |
|
|
@keyword fileblocks: The block this is used for; only used in a pretty
|
87 |
|
|
debug message.
|
88 |
|
|
@returns: The input sites minus the blacklist.
|
89 |
|
|
"""
|
90 |
|
|
Sites = sets.Set(Sites)
|
91 |
|
|
#print "Sites:",Sites
|
92 |
|
|
blacklist = self.blacklist
|
93 |
|
|
blacklist = sets.Set(self.match_list(Sites, self.blacklist))
|
94 |
|
|
#print "Black list:",blacklist
|
95 |
|
|
goodSites = Sites.difference(blacklist)
|
96 |
|
|
#print "Good Sites:",goodSites,"\n"
|
97 |
|
|
goodSites = list(goodSites)
|
98 |
|
|
if not goodSites and fileblocks:
|
99 |
|
|
msg = "No sites hosting the block %s after blackList" % fileblocks
|
100 |
gutsche |
1.1 |
common.logger.debug(5,msg)
|
101 |
|
|
common.logger.debug(5,"Proceeding without this block.\n")
|
102 |
ewv |
1.6 |
elif fileblocks:
|
103 |
|
|
common.logger.debug(5,"Selected sites for block %s via blacklist " \
|
104 |
|
|
"are %s.\n" % (', '.join(fileblocks), ', '.join(goodSites)))
|
105 |
gutsche |
1.1 |
return goodSites
|
106 |
|
|
|
107 |
ewv |
1.6 |
def checkWhiteList(self, Sites, fileblocks=''):
|
108 |
gutsche |
1.1 |
"""
|
109 |
ewv |
1.6 |
Select sites that are defined by the user (via white list).
|
110 |
|
|
|
111 |
|
|
The sites returned are the intersection of the input sites and the
|
112 |
|
|
contents of self.whitelist
|
113 |
ewv |
1.5 |
|
114 |
ewv |
1.6 |
@param Sites: The sites which will be filtered
|
115 |
|
|
@keyword fileblocks: The block this is applied for; only used for a
|
116 |
|
|
pretty debug message
|
117 |
|
|
@returns: The intersection of the input Sites and self.whitelist.
|
118 |
|
|
"""
|
119 |
|
|
if not self.whitelist:
|
120 |
|
|
return Sites
|
121 |
|
|
whitelist = self.whitelist
|
122 |
|
|
whitelist = self.match_list(Sites, self.whitelist)
|
123 |
|
|
#print "White list:",whitelist
|
124 |
|
|
Sites = sets.Set(Sites)
|
125 |
|
|
goodSites = Sites.intersection(whitelist)
|
126 |
|
|
#print "Good Sites:",goodSites,"\n"
|
127 |
|
|
goodSites = list(goodSites)
|
128 |
|
|
if not goodSites and fileblocks:
|
129 |
|
|
msg = "No sites hosting the block %s after whiteList" % fileblocks
|
130 |
gutsche |
1.1 |
common.logger.debug(5,msg)
|
131 |
|
|
common.logger.debug(5,"Proceeding without this block.\n")
|
132 |
ewv |
1.6 |
elif fileblocks:
|
133 |
|
|
common.logger.debug(5,"Selected sites for block %s via whitelist "\
|
134 |
|
|
" are %s.\n" % (', '.join(fileblocks), ', '.join(goodSites)))
|
135 |
ewv |
1.5 |
|
136 |
|
|
return goodSites
|
137 |
|
|
|
138 |
ewv |
1.6 |
def cleanForBlackWhiteList(self,destinations,list=False):
|
139 |
gutsche |
1.2 |
"""
|
140 |
ewv |
1.6 |
Clean for black/white lists using parser.
|
141 |
|
|
|
142 |
|
|
Take the input list and apply the blacklist, then the whitelist that
|
143 |
|
|
the user specified.
|
144 |
|
|
|
145 |
|
|
@param destinations: A list of all the input sites
|
146 |
|
|
@keyword list: Set to True or the string 'list' to return a list
|
147 |
|
|
object. Set to False or the string '' to return a string object.
|
148 |
|
|
The default is False.
|
149 |
|
|
@returns: The list of all input sites, first filtered by the blacklist,
|
150 |
|
|
then filtered by the whitelist. If list=True, returns a list; if
|
151 |
|
|
list=False, return a string.
|
152 |
gutsche |
1.2 |
"""
|
153 |
ewv |
1.6 |
if list:
|
154 |
|
|
return self.checkWhiteList(self.checkBlackList(destinations))
|
155 |
spiga |
1.3 |
else:
|
156 |
ewv |
1.6 |
return ','.join(self.checkWhiteList(self.checkBlackList( \
|
157 |
|
|
destinations)))
|
158 |
|
|
|
159 |
|
|
|
160 |
|
|
def match_list(self, names, match_list):
|
161 |
|
|
"""
|
162 |
|
|
Filter a list of names against a comma-separated list of expressions.
|
163 |
|
|
|
164 |
|
|
This uses the `match` function to do the heavy lifting
|
165 |
|
|
|
166 |
|
|
@param names: A list of input names to filter
|
167 |
|
|
@type names: list
|
168 |
|
|
@param match_list: A comma-separated list of expressions
|
169 |
|
|
@type match_list: str
|
170 |
|
|
@returns: A list, filtered from `names`, of all entries which match an
|
171 |
|
|
expression in match_list
|
172 |
|
|
@rtype: list
|
173 |
|
|
"""
|
174 |
|
|
results = []
|
175 |
|
|
if isinstance(match_list, types.StringType):
|
176 |
|
|
match_list = match_list.split(',')
|
177 |
|
|
|
178 |
|
|
for expr in match_list:
|
179 |
|
|
expr = expr.strip()
|
180 |
|
|
matching = self.match(names, expr)
|
181 |
|
|
if matching:
|
182 |
|
|
results.extend(matching)
|
183 |
|
|
else:
|
184 |
|
|
results.append(expr)
|
185 |
|
|
return results
|
186 |
|
|
|
187 |
|
|
|
188 |
|
|
def match(self, names, expr):
|
189 |
|
|
"""
|
190 |
|
|
Return all the entries in `names` which match `expr`
|
191 |
|
|
|
192 |
|
|
First, try to apply wildcard-based filters, then look at substrings,
|
193 |
|
|
then interpret expr as a regex.
|
194 |
|
|
|
195 |
|
|
@param names: An input list of strings to match
|
196 |
|
|
@param expr: A string expression to use for matching
|
197 |
|
|
@returns: All entries in the list `names` which match `expr`
|
198 |
|
|
"""
|
199 |
|
|
|
200 |
|
|
results = fnmatch.filter(names, expr)
|
201 |
|
|
results.extend([i for i in names if i.find(expr) >= 0])
|
202 |
|
|
try:
|
203 |
|
|
my_re = re.compile(expr)
|
204 |
|
|
except:
|
205 |
|
|
my_re = None
|
206 |
|
|
if not my_re:
|
207 |
|
|
return results
|
208 |
|
|
results.extend([i for i in names if my_re.search(i)])
|
209 |
|
|
return results
|
210 |
|
|
|
211 |
|
|
|
212 |
|
|
|
213 |
|
|
class SEBlackWhiteListParser(BlackWhiteListParser):
|
214 |
|
|
"""
|
215 |
|
|
Use the BlackWhiteListParser to filter out the possible list of SEs
|
216 |
|
|
from the user's input; see the documentation for BlackWhiteListParser.
|
217 |
|
|
"""
|
218 |
|
|
|
219 |
|
|
def __init__(self, cfg_params):
|
220 |
|
|
super(SEBlackWhiteListParser, self).__init__(cfg_params)
|
221 |
|
|
self.kind = 'se'
|
222 |
|
|
self.mapper = self.siteDBAPI.CMSNametoSE
|
223 |
|
|
self.configure(cfg_params)
|
224 |
|
|
|
225 |
|
|
|
226 |
ewv |
1.5 |
|
227 |
|
|
class CEBlackWhiteListParser(BlackWhiteListParser):
|
228 |
ewv |
1.6 |
"""
|
229 |
|
|
Use the BlackWhiteListParser to filter out the possible list of CEs
|
230 |
|
|
from the user's input; see the documentation for BlackWhiteListParser.
|
231 |
|
|
"""
|
232 |
|
|
|
233 |
ewv |
1.5 |
def __init__(self,cfg_params):
|
234 |
ewv |
1.6 |
super(CEBlackWhiteListParser, self).__init__(cfg_params)
|
235 |
|
|
self.kind = 'ce'
|
236 |
|
|
self.mapper = self.siteDBAPI.CMSNametoCE
|
237 |
|
|
self.configure(cfg_params)
|