ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PhEDExDatasvcInfo.py
Revision: 1.49
Committed: Fri Oct 12 13:36:22 2012 UTC (12 years, 6 months ago) by fanzago
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_9_1, CRAB_2_9_1_pre2, CRAB_2_9_1_pre1, CRAB_2_9_0, CRAB_2_9_0_pre2, CRAB_2_9_0_pre1, CRAB_2_8_8, CRAB_2_8_8_pre1, CRAB_2_8_7_patch3, CRAB_2_8_7_patch2, CRAB_2_8_7_patch1, CRAB_2_8_7, CRAB_2_8_7_pre2, CRAB_2_8_7_pre1, CRAB_2_8_6, CRAB_2_8_6_pre1, CRAB_2_8_5_patch3, CRAB_2_8_5_patch2, CRAB_2_8_5_patch1, CRAB_2_8_5, CRAB_2_8_5_pre5, CRAB_2_8_5_pre4, CRAB_2_8_5_pre3, CRAB_2_8_4_patch3, CRAB_2_8_5_pre2, CRAB_2_8_4_patch2, CRAB_2_8_5_pre1, CRAB_2_8_4_patch1, CRAB_2_8_4, CRAB_2_8_4_pre5, CRAB_2_8_4_pre4, CRAB_2_8_4_pre3, CRAB_2_8_4_pre2, CRAB_2_8_4_pre1, HEAD
Changes since 1.48: +4 -1 lines
Log Message:
removed the final double slash in the endpoint, savannah bug 98143

File Contents

# User Rev Content
1 spiga 1.3 from Actor import *
2 afanfani 1.1 import urllib
3     from xml.dom.minidom import parse
4     from crab_exceptions import *
5 spiga 1.3 from WorkSpace import *
6     from urlparse import urlparse
7     from LFNBaseName import *
8 fanzago 1.36 from crab_util import getUserName
9 afanfani 1.1
10     class PhEDExDatasvcInfo:
11 fanzago 1.28 def __init__( self , cfg_params=None, config=None ):
12 spiga 1.3
13     ## PhEDEx Data Service URL
14 belforte 1.39 self.datasvc_url="https://cmsweb.cern.ch/phedex/datasvc/xml/prod"
15 spiga 1.3
16 afanfani 1.6 self.FacOps_savannah = 'https://savannah.cern.ch/support/?func=additem&group=cmscompinfrasup'
17 belforte 1.48 self.stage_out_faq='https://twiki.cern.ch/twiki/bin/view/CMSPublic/SWGuideCrabFaq#Stageout_and_publication'
18 spiga 1.17 self.dataPub_faq = 'https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabForPublication'
19 spiga 1.3
20 fanzago 1.28 self.usePhedex = True
21 spiga 1.29 self.sched = common.scheduler.name().upper()
22 fanzago 1.28
23     if config!=None:
24     self.checkConfig(config)
25     else:
26     self.checkCfgConfig(cfg_params)
27    
28     self.protocol = self.srm_version
29    
30    
31     def checkConfig(self,config):
32     """
33     """
34     self.srm_version = config.get("srm_version",'srmv2')
35     self.node = config.get('storage_element',None)
36     self.lfn='/store/'
37    
38     def checkCfgConfig(self,cfg_params):
39     """
40     """
41     self.datasvc_url = cfg_params.get("USER.datasvc_url",self.datasvc_url)
42 spiga 1.3 self.srm_version = cfg_params.get("USER.srm_version",'srmv2')
43     self.node = cfg_params.get('USER.storage_element',None)
44 spiga 1.17
45 spiga 1.3 self.publish_data = cfg_params.get("USER.publish_data",0)
46     self.usenamespace = cfg_params.get("USER.usenamespace",0)
47 fanzago 1.10 self.user_remote_dir = cfg_params.get("USER.user_remote_dir",'')
48 fanzago 1.7 if self.user_remote_dir:
49     if ( self.user_remote_dir[-1] != '/' ) : self.user_remote_dir = self.user_remote_dir + '/'
50 spiga 1.17
51 spiga 1.3 self.datasetpath = cfg_params.get("CMSSW.datasetpath")
52     self.publish_data_name = cfg_params.get('USER.publish_data_name','')
53    
54 spiga 1.45 self.pset = cfg_params.get('CMSSW.pset',None)
55    
56 spiga 1.8 self.user_port = cfg_params.get("USER.storage_port",'8443')
57 spiga 1.3 self.user_se_path = cfg_params.get("USER.storage_path",'')
58 fanzago 1.7 if self.user_se_path:
59     if ( self.user_se_path[-1] != '/' ) : self.user_se_path = self.user_se_path + '/'
60    
61 spiga 1.3 #check if using "private" Storage
62 spiga 1.11 if not self.node :
63     msg = 'Please specify the storage_element name in your crab.cfg section [USER].\n'
64 spiga 1.34 msg +='\tFor further information please visit : %s'%self.stage_out_faq
65 spiga 1.11 raise CrabException(msg)
66 spiga 1.4 if (self.node.find('T1_') + self.node.find('T2_')+self.node.find('T3_')) == -3: self.usePhedex = False
67 spiga 1.15
68     if not self.usePhedex and ( self.user_remote_dir == '' or self.user_se_path == '' ):
69 fanzago 1.40 ####### FEDE FOR BUG 73010 ############
70 fanzago 1.43 msg = 'Error: task ' + common.work_space._top_dir + ' not correctly created. Please remove it. \n'
71     msg += ' You are asking to stage out without using CMS Storage Name convention. In this case you \n'
72 fanzago 1.41 msg += ' must specify both user_remote_dir and storage_path in the crab.cfg section [USER].\n'
73     msg += ' For further information please visit : \n\t%s'%self.stage_out_faq
74 fanzago 1.40 task = common._db.getTask()
75 fanzago 1.43 #add = '\n\n'
76     #import shutil
77     #try:
78     # add += ' Task not correctly created: removing the working_dir ' + common.work_space._top_dir + ' \n'
79     # shutil.rmtree(common.work_space._top_dir)
80     #except OSError:
81     # add += ' Warning: problems removing the working_dir ' + common.work_space._top_dir + ' \n'
82     # add += ' Please remove it by hand'
83     #msg += add
84 spiga 1.3 raise CrabException(msg)
85 spiga 1.22
86     self.forced_path = '/store/user/'
87 fanzago 1.42 if self.sched in ['LSF','PBS']:
88 spiga 1.31 self.srm_version = 'direct'
89 fanzago 1.42 self.SE = {'LSF':'', 'PBS':''}
90 spiga 1.22
91 fanzago 1.42 if self.sched == 'CAF':
92     #### FEDE TEST FOR XROOTD
93     ######### first solution ################
94     #eos = cfg_params.get("USER.caf_eos_area", 0)
95     #if eos == 0:
96     # self.forced_path = '/store/caf/user/'
97     #else:
98     # self.forced_path = '/store/eos/user'
99     #########################################
100     ######### second solution ###############
101     self.forced_path = cfg_params.get("USER.caf_lfn", '/store/caf/user')
102     #########################################
103 fanzago 1.44 #print "--->>> FORCING THE FIRST PART OF LFN WITH ", self.forced_path
104 fanzago 1.42 self.SE = {'CAF':'caf.cern.ch'}
105     self.srm_version = 'stageout'
106 fanzago 1.44 #print "--->>> query with 'stageout' "
107 fanzago 1.42 #########################################
108    
109 spiga 1.14 if not self.usePhedex:
110 spiga 1.15 self.forced_path = self.user_remote_dir
111 spiga 1.3 return
112    
113     def getEndpoint(self):
114     '''
115     Return full SE endpoint and related infos
116     '''
117     self.lfn = self.getLFN()
118    
119     #extract the PFN for the given node,LFN,protocol
120     endpoint = self.getStageoutPFN()
121 fanzago 1.27 if ( endpoint[-1] != '/' ) : endpoint = endpoint + '/'
122 fanzago 1.47 ### FEDE bug fix 93573
123     if ( self.lfn[-1] != '/' ) : self.lfn = self.lfn + '/'
124 belforte 1.46
125     if int(self.publish_data) == 1 or int(self.usenamespace) == 1:
126 fanzago 1.47 self.lfn = self.lfn + '${PSETHASH}/'
127     endpoint = endpoint + '${PSETHASH}/'
128 spiga 1.3
129     #extract SE name an SE_PATH (needed for publication)
130     SE, SE_PATH, User = self.splitEndpoint(endpoint)
131    
132 fanzago 1.42 #### FEDE FOR XROOTD #####
133     #print "in getEndpoint di PhEDExDatasvcInfo.py: "
134     #print " SE = ", SE
135     #print " SE_PATH = ", SE_PATH
136     #print " User = ", User
137     #print " endpoint = ", endpoint
138     ##############################
139    
140 spiga 1.3 return endpoint, self.lfn , SE, SE_PATH, User
141    
142     def splitEndpoint(self, endpoint):
143     '''
144     Return relevant infos from endpoint
145     '''
146     SE = ''
147     SE_PATH = ''
148 spiga 1.37 USER = getUserName()
149 spiga 1.3 if self.usePhedex:
150 fanzago 1.42 ### FEDE PER TEST WITH XROOTD
151     if (self.protocol == 'direct' or self.protocol == 'stageout'):
152 fanzago 1.18 SE = self.SE[self.sched]
153 fanzago 1.42 SE_PATH = endpoint
154     #############################
155     #print " SE_PATH = ", SE_PATH
156 spiga 1.3 else:
157     url = 'http://'+endpoint.split('://')[1]
158     scheme, host, path, params, query, fragment = urlparse(url)
159 spiga 1.33 SE = self.getAuthoritativeSE()
160 spiga 1.3 SE_PATH = endpoint.split(host)[1]
161     else:
162 spiga 1.20 SE = self.node
163     SE_PATH = self.user_se_path + self.user_remote_dir
164 spiga 1.37 if self.lfn.find('group') != -1:
165     try:
166     USER = (self.lfn.split('group')[1]).split('/')[1]
167     except:
168     pass
169 spiga 1.3 return SE, SE_PATH, USER
170    
171     def getLFN(self):
172     """
173     define the LFN composing the needed pieces
174     """
175     lfn = ''
176     l_User = False
177     if not self.usePhedex and (int(self.publish_data) == 0 and int(self.usenamespace) == 0) :
178     ### add here check if user is trying to force a wrong LFN using a T2 TODO
179     ## check if storage_name is a T2 (siteDB query)
180     ## if yes :match self.user_lfn with LFNBaseName...
181     ## if NOT : raise (you are using a T2. It's not allowed stage out into self.user_path+self.user_lfn)
182 spiga 1.15 lfn = self.user_remote_dir
183 spiga 1.3 return lfn
184     if self.publish_data_name == '' and int(self.publish_data) == 1:
185 spiga 1.17 msg = "Error. The [USER] section does not have 'publish_data_name'\n"
186     msg += '\tFor further information please visit : \n\t%s'%self.dataPub_faq
187 spiga 1.3 raise CrabException(msg)
188     if self.publish_data_name == '' and int(self.usenamespace) == 1:
189     self.publish_data_name = "DefaultDataset"
190 fanzago 1.26 if int(self.publish_data) == 1:
191     if self.sched in ['CAF']: l_User=True
192     primaryDataset = self.computePrimaryDataset()
193     ### added the case lfn = LFNBase(self.forced_path, primaryDataset, self.publish_data_name, publish=True)
194     ### for the publication in order to be able to check the lfn length
195 belforte 1.46 lfn = LFNBase(self.forced_path, primaryDataset, self.publish_data_name, publish=True)
196 fanzago 1.26 elif int(self.usenamespace) == 1:
197 spiga 1.3 if self.sched in ['CAF']: l_User=True
198     primaryDataset = self.computePrimaryDataset()
199 belforte 1.46 lfn = LFNBase(self.forced_path, primaryDataset, self.publish_data_name)
200 spiga 1.3 else:
201 spiga 1.5 if self.sched in ['CAF','LSF']: l_User=True
202 spiga 1.21 lfn = LFNBase(self.forced_path,self.user_remote_dir)
203 spiga 1.45
204 fanzago 1.35 if ( lfn[-1] != '/' ) : lfn = lfn + '/'
205 spiga 1.45
206 spiga 1.3 return lfn
207    
208     def computePrimaryDataset(self):
209     """
210     compute the last part for the LFN in case of publication
211     """
212     if (self.datasetpath.upper() != 'NONE'):
213     primarydataset = self.datasetpath.split("/")[1]
214     else:
215     primarydataset = self.publish_data_name
216     return primarydataset
217    
218 spiga 1.33 def domPhedex(self,params,datasvc_baseUrl):
219 spiga 1.3 """
220     PhEDEx Data Service lfn2pfn call
221    
222 spiga 1.33 input: params,datasvc_baseUrl
223 spiga 1.3 returns: DOM object with the content of the PhEDEx Data Service call
224     """
225     params = urllib.urlencode(params)
226     try:
227 spiga 1.33 urlresults = urllib.urlopen(datasvc_baseUrl, params)
228 spiga 1.3 urlresults = parse(urlresults)
229 slacapra 1.24 except IOError:
230 spiga 1.33 msg="Unable to access PhEDEx Data Service at %s"%datasvc_baseUrl
231 slacapra 1.24 raise CrabException(msg)
232 spiga 1.3 except:
233     urlresults = None
234    
235     return urlresults
236 afanfani 1.1
237 spiga 1.3 def parse_error(self,urlresults):
238     """
239     look for errors in the DOM object returned by PhEDEx Data Service call
240     """
241     errormsg = None
242     errors=urlresults.getElementsByTagName('error')
243     for error in errors:
244     errormsg=error.childNodes[0].data
245     if len(error.childNodes)>1:
246     errormsg+=error.childNodes[1].data
247     return errormsg
248    
249     def parse_lfn2pfn(self,urlresults):
250     """
251     Parse the content of the result of lfn2pfn PhEDEx Data Service call
252    
253     input: DOM object with the content of the lfn2pfn call
254     returns: PFN
255     """
256     result = urlresults.getElementsByTagName('phedex')
257    
258     if not result:
259     return []
260     result = result[0]
261     pfn = None
262     mapping = result.getElementsByTagName('mapping')
263     for m in mapping:
264     pfn=m.getAttribute("pfn")
265     if pfn:
266     return pfn
267    
268     def getStageoutPFN( self ):
269     """
270     input: LFN,node name,protocol
271     returns: PFN
272     """
273     if self.usePhedex:
274 spiga 1.33 params = {'node' : self.node , 'lfn': self.lfn , 'protocol': self.protocol}
275     datasvc_lfn2pfn="%s/lfn2pfn"%self.datasvc_url
276 spiga 1.3 fullurl="%s/lfn2pfn?node=%s&lfn=%s&protocol=%s"%(self.datasvc_url,self.node,self.lfn,self.protocol)
277 fanzago 1.44 #print "--->>> fullurl = ", fullurl
278 spiga 1.33 domlfn2pfn = self.domPhedex(params,datasvc_lfn2pfn)
279 spiga 1.3 if not domlfn2pfn :
280     msg="Unable to get info from %s"%fullurl
281     raise CrabException(msg)
282    
283     errormsg = self.parse_error(domlfn2pfn)
284     if errormsg:
285     msg="Error extracting info from %s due to: %s"%(fullurl,errormsg)
286     raise CrabException(msg)
287    
288     stageoutpfn = self.parse_lfn2pfn(domlfn2pfn)
289     if not stageoutpfn:
290 afanfani 1.6 msg ='Unable to get stageout path from TFC at Site %s \n'%self.node
291     msg+=' Please alert the CompInfraSup group through their savannah %s \n'%self.FacOps_savannah
292     msg+=' reporting: \n'
293     msg+=' Summary: Unable to get user stageout from TFC at Site %s \n'%self.node
294     msg+=' OriginalSubmission: stageout path is not retrieved from %s \n'%fullurl
295 spiga 1.3 raise CrabException(msg)
296     else:
297 mcinquil 1.32 if self.sched in ['CAF','LSF','PBS'] :
298 fanzago 1.49 if (self.user_se_path[-1]=='/') and (self.lfn[0]=='/'):
299     stageoutpfn = self.user_se_path+(self.lfn).lstrip('/')
300     else:
301     stageoutpfn = self.user_se_path+self.lfn
302 spiga 1.11 else:
303     stageoutpfn = 'srm://'+self.node+':'+self.user_port+self.user_se_path+self.lfn
304 spiga 1.3
305 fanzago 1.35 if ( stageoutpfn[-1] != '/' ) : stageoutpfn = stageoutpfn + '/'
306 spiga 1.3 return stageoutpfn
307 afanfani 1.6
308 spiga 1.33 def getAuthoritativeSE(self):
309     """
310     input: node name
311     returns: AuthoritativeSE
312     """
313     params = {'node' : self.node }
314     datasvc_nodes="%s/nodes"%self.datasvc_url
315     fullurl="%s/nodes/?node=%s"%(self.datasvc_url,self.node)
316     domnodes = self.domPhedex(params,datasvc_nodes)
317    
318     if not domnodes :
319     msg="Unable to get info from %s"%fullurl
320     raise CrabException(msg)
321    
322     errormsg = self.parse_error(domnodes)
323     if errormsg:
324     msg="Error extracting info from %s due to: %s"%(fullurl,errormsg)
325     raise CrabException(msg)
326     result = domnodes.getElementsByTagName('phedex')
327     if not result:
328     return []
329     result = result[0]
330     se = None
331     node = result.getElementsByTagName('node')
332     for m in node:
333     se=m.getAttribute("se")
334     if se:
335     return se
336 afanfani 1.6
337    
338     if __name__ == '__main__':
339     """
340     Sort of unit testing to check Phedex API for whatever site and/or lfn.
341     Usage:
342     python PhEDExDatasvcInfo.py --node T2_IT_Bari --lfn /store/maremma
343    
344     """
345     import getopt,sys
346     from crab_util import *
347     import common
348     klass_name = 'SchedulerGlite'
349     klass = importName(klass_name, klass_name)
350     common.scheduler = klass()
351    
352     lfn="/store/user/"
353     node='T2_IT_Bari'
354     valid = ['node=','lfn=']
355     try:
356     opts, args = getopt.getopt(sys.argv[1:], "", valid)
357     except getopt.GetoptError, ex:
358     print str(ex)
359     sys.exit(1)
360     for o, a in opts:
361     if o == "--node":
362     node = a
363     if o == "--lfn":
364     lfn = a
365    
366     mycfg_params = { 'USER.storage_element': node }
367     dsvc = PhEDExDatasvcInfo(mycfg_params)
368     dsvc.lfn = lfn
369     print dsvc.getStageoutPFN()
370