ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PhEDExDatasvcInfo.py
Revision: 1.42
Committed: Mon Aug 29 09:48:03 2011 UTC (13 years, 8 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_7_9_pre2
Changes since 1.41: +34 -7 lines
Log Message:
changes about xrootd plugin, savannah #83930

File Contents

# User Rev Content
1 spiga 1.3 from Actor import *
2 afanfani 1.1 import urllib
3     from xml.dom.minidom import parse
4     from crab_exceptions import *
5 spiga 1.3 from WorkSpace import *
6     from urlparse import urlparse
7     from LFNBaseName import *
8 fanzago 1.36 from crab_util import getUserName
9 afanfani 1.1
10     class PhEDExDatasvcInfo:
11 fanzago 1.28 def __init__( self , cfg_params=None, config=None ):
12 spiga 1.3
13     ## PhEDEx Data Service URL
14 belforte 1.39 self.datasvc_url="https://cmsweb.cern.ch/phedex/datasvc/xml/prod"
15 spiga 1.3
16 afanfani 1.6 self.FacOps_savannah = 'https://savannah.cern.ch/support/?func=additem&group=cmscompinfrasup'
17 spiga 1.34 self.stage_out_faq='https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabHowTo#Stageout_and_publication'
18 spiga 1.17 self.dataPub_faq = 'https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabForPublication'
19 spiga 1.3
20 fanzago 1.28 self.usePhedex = True
21 spiga 1.29 self.sched = common.scheduler.name().upper()
22 fanzago 1.28
23     if config!=None:
24     self.checkConfig(config)
25     else:
26     self.checkCfgConfig(cfg_params)
27    
28     self.protocol = self.srm_version
29    
30    
31     def checkConfig(self,config):
32     """
33     """
34     self.srm_version = config.get("srm_version",'srmv2')
35     self.node = config.get('storage_element',None)
36     self.lfn='/store/'
37    
38     def checkCfgConfig(self,cfg_params):
39     """
40     """
41     self.datasvc_url = cfg_params.get("USER.datasvc_url",self.datasvc_url)
42 spiga 1.3 self.srm_version = cfg_params.get("USER.srm_version",'srmv2')
43     self.node = cfg_params.get('USER.storage_element',None)
44 spiga 1.17
45 spiga 1.3 self.publish_data = cfg_params.get("USER.publish_data",0)
46     self.usenamespace = cfg_params.get("USER.usenamespace",0)
47 fanzago 1.10 self.user_remote_dir = cfg_params.get("USER.user_remote_dir",'')
48 fanzago 1.7 if self.user_remote_dir:
49     if ( self.user_remote_dir[-1] != '/' ) : self.user_remote_dir = self.user_remote_dir + '/'
50 spiga 1.17
51 spiga 1.3 self.datasetpath = cfg_params.get("CMSSW.datasetpath")
52     self.publish_data_name = cfg_params.get('USER.publish_data_name','')
53    
54 spiga 1.8 self.user_port = cfg_params.get("USER.storage_port",'8443')
55 spiga 1.3 self.user_se_path = cfg_params.get("USER.storage_path",'')
56 fanzago 1.7 if self.user_se_path:
57     if ( self.user_se_path[-1] != '/' ) : self.user_se_path = self.user_se_path + '/'
58    
59 spiga 1.3 #check if using "private" Storage
60 spiga 1.11 if not self.node :
61     msg = 'Please specify the storage_element name in your crab.cfg section [USER].\n'
62 spiga 1.34 msg +='\tFor further information please visit : %s'%self.stage_out_faq
63 spiga 1.11 raise CrabException(msg)
64 spiga 1.4 if (self.node.find('T1_') + self.node.find('T2_')+self.node.find('T3_')) == -3: self.usePhedex = False
65 spiga 1.15
66     if not self.usePhedex and ( self.user_remote_dir == '' or self.user_se_path == '' ):
67 fanzago 1.40 ####### FEDE FOR BUG 73010 ############
68 fanzago 1.41 msg = 'Error: you are asking to stage out without using CMS Storage Name convention. In this case you \n'
69     msg += ' must specify both user_remote_dir and storage_path in the crab.cfg section [USER].\n'
70     msg += ' For further information please visit : \n\t%s'%self.stage_out_faq
71 fanzago 1.40 task = common._db.getTask()
72 fanzago 1.41 add = '\n\n'
73 fanzago 1.40 import shutil
74     try:
75 fanzago 1.41 add += ' Task not correctly created: removing the working_dir ' + common.work_space._top_dir + ' \n'
76 fanzago 1.40 shutil.rmtree(common.work_space._top_dir)
77     except OSError:
78 fanzago 1.41 add += ' Warning: problems removing the working_dir ' + common.work_space._top_dir + ' \n'
79     add += ' Please remove it by hand'
80 fanzago 1.40 msg += add
81 spiga 1.3 raise CrabException(msg)
82 spiga 1.22
83     self.forced_path = '/store/user/'
84 fanzago 1.42 if self.sched in ['LSF','PBS']:
85 spiga 1.31 self.srm_version = 'direct'
86 fanzago 1.42 self.SE = {'LSF':'', 'PBS':''}
87 spiga 1.22
88 fanzago 1.42 if self.sched == 'CAF':
89     #### FEDE TEST FOR XROOTD
90     ######### first solution ################
91     #eos = cfg_params.get("USER.caf_eos_area", 0)
92     #if eos == 0:
93     # self.forced_path = '/store/caf/user/'
94     #else:
95     # self.forced_path = '/store/eos/user'
96     #########################################
97     ######### second solution ###############
98     self.forced_path = cfg_params.get("USER.caf_lfn", '/store/caf/user')
99     #########################################
100     print "--->>> FORCING THE FIRST PART OF LFN WITH ", self.forced_path
101     self.SE = {'CAF':'caf.cern.ch'}
102     self.srm_version = 'stageout'
103     print "--->>> query with 'stageout' "
104     #########################################
105    
106 spiga 1.14 if not self.usePhedex:
107 spiga 1.15 self.forced_path = self.user_remote_dir
108 spiga 1.3 return
109    
110     def getEndpoint(self):
111     '''
112     Return full SE endpoint and related infos
113     '''
114     self.lfn = self.getLFN()
115    
116     #extract the PFN for the given node,LFN,protocol
117     endpoint = self.getStageoutPFN()
118 fanzago 1.27 if ( endpoint[-1] != '/' ) : endpoint = endpoint + '/'
119 spiga 1.3
120     #extract SE name an SE_PATH (needed for publication)
121     SE, SE_PATH, User = self.splitEndpoint(endpoint)
122    
123 fanzago 1.42 #### FEDE FOR XROOTD #####
124     #print "in getEndpoint di PhEDExDatasvcInfo.py: "
125     #print " SE = ", SE
126     #print " SE_PATH = ", SE_PATH
127     #print " User = ", User
128     #print " endpoint = ", endpoint
129     ##############################
130    
131 spiga 1.3 return endpoint, self.lfn , SE, SE_PATH, User
132    
133     def splitEndpoint(self, endpoint):
134     '''
135     Return relevant infos from endpoint
136     '''
137     SE = ''
138     SE_PATH = ''
139 spiga 1.37 USER = getUserName()
140 spiga 1.3 if self.usePhedex:
141 fanzago 1.42 ### FEDE PER TEST WITH XROOTD
142     if (self.protocol == 'direct' or self.protocol == 'stageout'):
143 fanzago 1.18 SE = self.SE[self.sched]
144 fanzago 1.42 SE_PATH = endpoint
145     #############################
146     #print " SE_PATH = ", SE_PATH
147 spiga 1.3 else:
148     url = 'http://'+endpoint.split('://')[1]
149     scheme, host, path, params, query, fragment = urlparse(url)
150 spiga 1.33 SE = self.getAuthoritativeSE()
151 spiga 1.3 SE_PATH = endpoint.split(host)[1]
152     else:
153 spiga 1.20 SE = self.node
154     SE_PATH = self.user_se_path + self.user_remote_dir
155 spiga 1.37 if self.lfn.find('group') != -1:
156     try:
157     USER = (self.lfn.split('group')[1]).split('/')[1]
158     except:
159     pass
160 spiga 1.3 return SE, SE_PATH, USER
161    
162     def getLFN(self):
163     """
164     define the LFN composing the needed pieces
165     """
166     lfn = ''
167     l_User = False
168     if not self.usePhedex and (int(self.publish_data) == 0 and int(self.usenamespace) == 0) :
169     ### add here check if user is trying to force a wrong LFN using a T2 TODO
170     ## check if storage_name is a T2 (siteDB query)
171     ## if yes :match self.user_lfn with LFNBaseName...
172     ## if NOT : raise (you are using a T2. It's not allowed stage out into self.user_path+self.user_lfn)
173 spiga 1.15 lfn = self.user_remote_dir
174 spiga 1.3 return lfn
175     if self.publish_data_name == '' and int(self.publish_data) == 1:
176 spiga 1.17 msg = "Error. The [USER] section does not have 'publish_data_name'\n"
177     msg += '\tFor further information please visit : \n\t%s'%self.dataPub_faq
178 spiga 1.3 raise CrabException(msg)
179     if self.publish_data_name == '' and int(self.usenamespace) == 1:
180     self.publish_data_name = "DefaultDataset"
181 fanzago 1.26 if int(self.publish_data) == 1:
182     if self.sched in ['CAF']: l_User=True
183     primaryDataset = self.computePrimaryDataset()
184     ### added the case lfn = LFNBase(self.forced_path, primaryDataset, self.publish_data_name, publish=True)
185     ### for the publication in order to be able to check the lfn length
186     lfn = LFNBase(self.forced_path, primaryDataset, self.publish_data_name, publish=True) + '/${PSETHASH}/'
187     elif int(self.usenamespace) == 1:
188 spiga 1.3 if self.sched in ['CAF']: l_User=True
189     primaryDataset = self.computePrimaryDataset()
190 spiga 1.21 lfn = LFNBase(self.forced_path, primaryDataset, self.publish_data_name) + '/${PSETHASH}/'
191 spiga 1.3 else:
192 spiga 1.5 if self.sched in ['CAF','LSF']: l_User=True
193 spiga 1.21 lfn = LFNBase(self.forced_path,self.user_remote_dir)
194 fanzago 1.35 if ( lfn[-1] != '/' ) : lfn = lfn + '/'
195 spiga 1.3 return lfn
196    
197     def computePrimaryDataset(self):
198     """
199     compute the last part for the LFN in case of publication
200     """
201     if (self.datasetpath.upper() != 'NONE'):
202     primarydataset = self.datasetpath.split("/")[1]
203     else:
204     primarydataset = self.publish_data_name
205     return primarydataset
206    
207 spiga 1.33 def domPhedex(self,params,datasvc_baseUrl):
208 spiga 1.3 """
209     PhEDEx Data Service lfn2pfn call
210    
211 spiga 1.33 input: params,datasvc_baseUrl
212 spiga 1.3 returns: DOM object with the content of the PhEDEx Data Service call
213     """
214     params = urllib.urlencode(params)
215     try:
216 spiga 1.33 urlresults = urllib.urlopen(datasvc_baseUrl, params)
217 spiga 1.3 urlresults = parse(urlresults)
218 slacapra 1.24 except IOError:
219 spiga 1.33 msg="Unable to access PhEDEx Data Service at %s"%datasvc_baseUrl
220 slacapra 1.24 raise CrabException(msg)
221 spiga 1.3 except:
222     urlresults = None
223    
224     return urlresults
225 afanfani 1.1
226 spiga 1.3 def parse_error(self,urlresults):
227     """
228     look for errors in the DOM object returned by PhEDEx Data Service call
229     """
230     errormsg = None
231     errors=urlresults.getElementsByTagName('error')
232     for error in errors:
233     errormsg=error.childNodes[0].data
234     if len(error.childNodes)>1:
235     errormsg+=error.childNodes[1].data
236     return errormsg
237    
238     def parse_lfn2pfn(self,urlresults):
239     """
240     Parse the content of the result of lfn2pfn PhEDEx Data Service call
241    
242     input: DOM object with the content of the lfn2pfn call
243     returns: PFN
244     """
245     result = urlresults.getElementsByTagName('phedex')
246    
247     if not result:
248     return []
249     result = result[0]
250     pfn = None
251     mapping = result.getElementsByTagName('mapping')
252     for m in mapping:
253     pfn=m.getAttribute("pfn")
254     if pfn:
255     return pfn
256    
257     def getStageoutPFN( self ):
258     """
259     input: LFN,node name,protocol
260     returns: PFN
261     """
262     if self.usePhedex:
263 spiga 1.33 params = {'node' : self.node , 'lfn': self.lfn , 'protocol': self.protocol}
264     datasvc_lfn2pfn="%s/lfn2pfn"%self.datasvc_url
265 spiga 1.3 fullurl="%s/lfn2pfn?node=%s&lfn=%s&protocol=%s"%(self.datasvc_url,self.node,self.lfn,self.protocol)
266 fanzago 1.42 print "--->>> fullurl = ", fullurl
267 spiga 1.33 domlfn2pfn = self.domPhedex(params,datasvc_lfn2pfn)
268 spiga 1.3 if not domlfn2pfn :
269     msg="Unable to get info from %s"%fullurl
270     raise CrabException(msg)
271    
272     errormsg = self.parse_error(domlfn2pfn)
273     if errormsg:
274     msg="Error extracting info from %s due to: %s"%(fullurl,errormsg)
275     raise CrabException(msg)
276    
277     stageoutpfn = self.parse_lfn2pfn(domlfn2pfn)
278     if not stageoutpfn:
279 afanfani 1.6 msg ='Unable to get stageout path from TFC at Site %s \n'%self.node
280     msg+=' Please alert the CompInfraSup group through their savannah %s \n'%self.FacOps_savannah
281     msg+=' reporting: \n'
282     msg+=' Summary: Unable to get user stageout from TFC at Site %s \n'%self.node
283     msg+=' OriginalSubmission: stageout path is not retrieved from %s \n'%fullurl
284 spiga 1.3 raise CrabException(msg)
285     else:
286 mcinquil 1.32 if self.sched in ['CAF','LSF','PBS'] :
287 spiga 1.12 stageoutpfn = self.user_se_path+self.lfn
288 spiga 1.11 else:
289     stageoutpfn = 'srm://'+self.node+':'+self.user_port+self.user_se_path+self.lfn
290 spiga 1.3
291 fanzago 1.35 if ( stageoutpfn[-1] != '/' ) : stageoutpfn = stageoutpfn + '/'
292 spiga 1.3 return stageoutpfn
293 afanfani 1.6
294 spiga 1.33 def getAuthoritativeSE(self):
295     """
296     input: node name
297     returns: AuthoritativeSE
298     """
299     params = {'node' : self.node }
300     datasvc_nodes="%s/nodes"%self.datasvc_url
301     fullurl="%s/nodes/?node=%s"%(self.datasvc_url,self.node)
302     domnodes = self.domPhedex(params,datasvc_nodes)
303    
304     if not domnodes :
305     msg="Unable to get info from %s"%fullurl
306     raise CrabException(msg)
307    
308     errormsg = self.parse_error(domnodes)
309     if errormsg:
310     msg="Error extracting info from %s due to: %s"%(fullurl,errormsg)
311     raise CrabException(msg)
312     result = domnodes.getElementsByTagName('phedex')
313     if not result:
314     return []
315     result = result[0]
316     se = None
317     node = result.getElementsByTagName('node')
318     for m in node:
319     se=m.getAttribute("se")
320     if se:
321     return se
322 afanfani 1.6
323    
324     if __name__ == '__main__':
325     """
326     Sort of unit testing to check Phedex API for whatever site and/or lfn.
327     Usage:
328     python PhEDExDatasvcInfo.py --node T2_IT_Bari --lfn /store/maremma
329    
330     """
331     import getopt,sys
332     from crab_util import *
333     import common
334     klass_name = 'SchedulerGlite'
335     klass = importName(klass_name, klass_name)
336     common.scheduler = klass()
337    
338     lfn="/store/user/"
339     node='T2_IT_Bari'
340     valid = ['node=','lfn=']
341     try:
342     opts, args = getopt.getopt(sys.argv[1:], "", valid)
343     except getopt.GetoptError, ex:
344     print str(ex)
345     sys.exit(1)
346     for o, a in opts:
347     if o == "--node":
348     node = a
349     if o == "--lfn":
350     lfn = a
351    
352     mycfg_params = { 'USER.storage_element': node }
353     dsvc = PhEDExDatasvcInfo(mycfg_params)
354     dsvc.lfn = lfn
355     print dsvc.getStageoutPFN()
356