ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/PhEDExDatasvcInfo.py
Revision: 1.9
Committed: Fri Oct 10 14:38:00 2008 UTC (16 years, 6 months ago) by spiga
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_2_4_0_pre8
Changes since 1.8: +1 -1 lines
Log Message:
syntax

File Contents

# Content
1 from Actor import *
2 import urllib
3 from xml.dom.minidom import parse
4 from crab_exceptions import *
5 from crab_logger import Logger
6 from WorkSpace import *
7 from urlparse import urlparse
8 from LFNBaseName import *
9
10 class PhEDExDatasvcInfo:
11 def __init__( self , cfg_params ):
12
13 ## PhEDEx Data Service URL
14 url="https://cmsweb.cern.ch/phedex/datasvc/xml/prod"
15 self.datasvc_url = cfg_params.get("USER.datasvc_url",url)
16
17 self.FacOps_savannah = 'https://savannah.cern.ch/support/?func=additem&group=cmscompinfrasup'
18
19
20 self.srm_version = cfg_params.get("USER.srm_version",'srmv2')
21 self.node = cfg_params.get('USER.storage_element',None)
22
23 self.publish_data = cfg_params.get("USER.publish_data",0)
24 self.usenamespace = cfg_params.get("USER.usenamespace",0)
25 self.user_remote_dir = cfg_params.get("USER.remote_dir",'')
26 if self.user_remote_dir:
27 if ( self.user_remote_dir[-1] != '/' ) : self.user_remote_dir = self.user_remote_dir + '/'
28
29 self.datasetpath = cfg_params.get("CMSSW.datasetpath")
30 self.publish_data_name = cfg_params.get('USER.publish_data_name','')
31
32 self.user_lfn = cfg_params.get("USER.lfn",'')
33 if self.user_lfn:
34 if ( self.user_lfn[-1] != '/' ) : self.user_lfn = self.user_lfn + '/'
35
36 self.user_port = cfg_params.get("USER.storage_port",'8443')
37 self.user_se_path = cfg_params.get("USER.storage_path",'')
38 if self.user_se_path:
39 if ( self.user_se_path[-1] != '/' ) : self.user_se_path = self.user_se_path + '/'
40
41
42 #check if using "private" Storage
43 self.usePhedex = True
44 if (self.node.find('T1_') + self.node.find('T2_')+self.node.find('T3_')) == -3: self.usePhedex = False
45 if not self.usePhedex and ( self.user_lfn == '' or self.user_se_path == '' ):
46 msg = 'You are asking to stage out without using CMS Storage Name convention. In this case you \n'
47 msg += ' must specify both lfn and storage_path in the crab.cfg section [USER].\n '
48 msg += ' For further information please visit: ADD_TWIKI_LINK'
49 raise CrabException(msg)
50 self.sched = common.scheduler.name().upper()
51 self.protocol = self.srm_version
52 if self.sched in ['CAF','LSF']:self.protocol = 'direct'
53
54 return
55
56 def getEndpoint(self):
57 '''
58 Return full SE endpoint and related infos
59 '''
60 self.lfn = self.getLFN()
61
62 #extract the PFN for the given node,LFN,protocol
63 endpoint = self.getStageoutPFN()
64
65 #extract SE name an SE_PATH (needed for publication)
66 SE, SE_PATH, User = self.splitEndpoint(endpoint)
67
68 return endpoint, self.lfn , SE, SE_PATH, User
69
70 def splitEndpoint(self, endpoint):
71 '''
72 Return relevant infos from endpoint
73 '''
74 SE = ''
75 SE_PATH = ''
76 USER = ''
77 if self.usePhedex:
78 if self.protocol == 'direct':
79 query=endpoint
80 SE_PATH = endpoint
81 ### FEDE added SE ###
82 SE = self.sched
83 else:
84 url = 'http://'+endpoint.split('://')[1]
85 # python > 2.4
86 # SE = urlparse(url).hostname
87 scheme, host, path, params, query, fragment = urlparse(url)
88 SE = host.split(':')[0]
89 SE_PATH = endpoint.split(host)[1]
90 USER = (query.split('user')[1]).split('/')[1]
91 else:
92 SE = self.node
93 SE_PATH = self.user_se_path + self.user_lfn
94 try:
95 USER = (self.lfn.split('user')[1]).split('/')[1]
96 except:
97 pass
98
99 return SE, SE_PATH, USER
100
101
102 def getLFN(self):
103 """
104 define the LFN composing the needed pieces
105 """
106 lfn = ''
107 l_User = False
108 if not self.usePhedex and (int(self.publish_data) == 0 and int(self.usenamespace) == 0) :
109 ### add here check if user is trying to force a wrong LFN using a T2 TODO
110 ## check if storage_name is a T2 (siteDB query)
111 ## if yes :match self.user_lfn with LFNBaseName...
112 ## if NOT : raise (you are using a T2. It's not allowed stage out into self.user_path+self.user_lfn)
113 lfn = self.user_lfn
114 return lfn
115 if self.publish_data_name == '' and int(self.publish_data) == 1:
116 msg = "Eeror. The [USER] section does not have 'publish_data_name'"
117 raise CrabException(msg)
118 if self.publish_data_name == '' and int(self.usenamespace) == 1:
119 self.publish_data_name = "DefaultDataset"
120 if int(self.publish_data) == 1 or int(self.usenamespace) == 1:
121 if self.sched in ['CAF']: l_User=True
122 primaryDataset = self.computePrimaryDataset()
123 lfn = LFNBase(primaryDataset,self.publish_data_name,LocalUser=l_User) + '/${PSETHASH}/'
124 else:
125 if self.sched in ['CAF','LSF']: l_User=True
126 lfn = LFNBase(self.user_remote_dir,LocalUser=l_User)
127 return lfn
128
129 def computePrimaryDataset(self):
130 """
131 compute the last part for the LFN in case of publication
132 """
133 if (self.datasetpath.upper() != 'NONE'):
134 primarydataset = self.datasetpath.split("/")[1]
135 else:
136 primarydataset = self.publish_data_name
137 return primarydataset
138
139 def lfn2pfn(self):
140 """
141 PhEDEx Data Service lfn2pfn call
142
143 input: LFN,node name,protocol
144 returns: DOM object with the content of the PhEDEx Data Service call
145 """
146 params = {'node' : self.node , 'lfn': self.lfn , 'protocol': self.protocol}
147 params = urllib.urlencode(params)
148 datasvc_lfn2pfn="%s/lfn2pfn"%self.datasvc_url
149 urlresults = urllib.urlopen(datasvc_lfn2pfn, params)
150 try:
151 urlresults = parse(urlresults)
152 except:
153 urlresults = None
154
155 return urlresults
156
157 def parse_error(self,urlresults):
158 """
159 look for errors in the DOM object returned by PhEDEx Data Service call
160 """
161 errormsg = None
162 errors=urlresults.getElementsByTagName('error')
163 for error in errors:
164 errormsg=error.childNodes[0].data
165 if len(error.childNodes)>1:
166 errormsg+=error.childNodes[1].data
167 return errormsg
168
169 def parse_lfn2pfn(self,urlresults):
170 """
171 Parse the content of the result of lfn2pfn PhEDEx Data Service call
172
173 input: DOM object with the content of the lfn2pfn call
174 returns: PFN
175 """
176 result = urlresults.getElementsByTagName('phedex')
177
178 if not result:
179 return []
180 result = result[0]
181 pfn = None
182 mapping = result.getElementsByTagName('mapping')
183 for m in mapping:
184 pfn=m.getAttribute("pfn")
185 if pfn:
186 return pfn
187
188 def getStageoutPFN( self ):
189 """
190 input: LFN,node name,protocol
191 returns: PFN
192 """
193 if self.usePhedex:
194 fullurl="%s/lfn2pfn?node=%s&lfn=%s&protocol=%s"%(self.datasvc_url,self.node,self.lfn,self.protocol)
195 domlfn2pfn = self.lfn2pfn()
196 if not domlfn2pfn :
197 msg="Unable to get info from %s"%fullurl
198 raise CrabException(msg)
199
200 errormsg = self.parse_error(domlfn2pfn)
201 if errormsg:
202 msg="Error extracting info from %s due to: %s"%(fullurl,errormsg)
203 raise CrabException(msg)
204
205 stageoutpfn = self.parse_lfn2pfn(domlfn2pfn)
206 if not stageoutpfn:
207 msg ='Unable to get stageout path from TFC at Site %s \n'%self.node
208 msg+=' Please alert the CompInfraSup group through their savannah %s \n'%self.FacOps_savannah
209 msg+=' reporting: \n'
210 msg+=' Summary: Unable to get user stageout from TFC at Site %s \n'%self.node
211 msg+=' OriginalSubmission: stageout path is not retrieved from %s \n'%fullurl
212 raise CrabException(msg)
213 else:
214 stageoutpfn = 'srm://'+self.node+':'+self.user_port+self.user_se_path+self.lfn
215
216 return stageoutpfn
217
218
219
220 if __name__ == '__main__':
221 """
222 Sort of unit testing to check Phedex API for whatever site and/or lfn.
223 Usage:
224 python PhEDExDatasvcInfo.py --node T2_IT_Bari --lfn /store/maremma
225
226 """
227 import getopt,sys
228 from crab_util import *
229 import common
230 klass_name = 'SchedulerGlite'
231 klass = importName(klass_name, klass_name)
232 common.scheduler = klass()
233
234 lfn="/store/user/"
235 node='T2_IT_Bari'
236 valid = ['node=','lfn=']
237 try:
238 opts, args = getopt.getopt(sys.argv[1:], "", valid)
239 except getopt.GetoptError, ex:
240 print str(ex)
241 sys.exit(1)
242 for o, a in opts:
243 if o == "--node":
244 node = a
245 if o == "--lfn":
246 lfn = a
247
248 mycfg_params = { 'USER.storage_element': node }
249 dsvc = PhEDExDatasvcInfo(mycfg_params)
250 dsvc.lfn = lfn
251 print dsvc.getStageoutPFN()
252