1 |
afanfani |
1.1 |
#!/usr/bin/env python2
|
2 |
|
|
import sys, os, string, re
|
3 |
|
|
from DBSInfo import *
|
4 |
|
|
|
5 |
afanfani |
1.3 |
|
6 |
afanfani |
1.1 |
# ####################################
|
7 |
afanfani |
1.3 |
class DataDiscoveryError(exceptions.Exception):
|
8 |
|
|
def __init__(self, errorMessage):
|
9 |
|
|
args=errorMessage
|
10 |
|
|
exceptions.Exception.__init__(self, args)
|
11 |
|
|
pass
|
12 |
|
|
|
13 |
|
|
def getErrorMessage(self):
|
14 |
|
|
""" Return exception error """
|
15 |
|
|
return "%s" % (self.args)
|
16 |
|
|
|
17 |
afanfani |
1.1 |
# ####################################
|
18 |
afanfani |
1.3 |
class NotExistingDatasetError(exceptions.Exception):
|
19 |
|
|
def __init__(self, errorMessage):
|
20 |
|
|
args=errorMessage
|
21 |
|
|
exceptions.Exception.__init__(self, args)
|
22 |
|
|
pass
|
23 |
|
|
|
24 |
|
|
def getErrorMessage(self):
|
25 |
|
|
""" Return exception error """
|
26 |
|
|
return "%s" % (self.args)
|
27 |
afanfani |
1.1 |
|
28 |
|
|
# ####################################
|
29 |
afanfani |
1.3 |
class NoDataTierinProvenanceError(exceptions.Exception):
|
30 |
|
|
def __init__(self, errorMessage):
|
31 |
|
|
args=errorMessage
|
32 |
|
|
exceptions.Exception.__init__(self, args)
|
33 |
|
|
pass
|
34 |
|
|
|
35 |
|
|
def getErrorMessage(self):
|
36 |
|
|
""" Return exception error """
|
37 |
|
|
return "%s" % (self.args)
|
38 |
afanfani |
1.1 |
|
39 |
|
|
# ####################################
|
40 |
|
|
# class to find and extact info from published data
|
41 |
|
|
class DataDiscovery:
|
42 |
|
|
def __init__(self, owner, dataset, dataTiers, cfg_params):
|
43 |
|
|
|
44 |
|
|
# Attributes
|
45 |
afanfani |
1.3 |
self.dbsdataset='/'+dataset+'/datatier/'+owner
|
46 |
afanfani |
1.1 |
self.dataTiers = dataTiers
|
47 |
|
|
self.cfg_params = cfg_params
|
48 |
|
|
|
49 |
afanfani |
1.3 |
self.dbspaths= [] # DBS output: list of dbspaths for all data
|
50 |
afanfani |
1.1 |
self.allblocks = [] # DBS output: list of map fileblocks-totevts for all dataset-owners
|
51 |
afanfani |
1.3 |
self.blocksinfo = {} # DBS output: map fileblocks-totevts for the primary block, used internally to this class
|
52 |
afanfani |
1.1 |
#DBS output: max events computed by method getMaxEvents
|
53 |
|
|
|
54 |
|
|
# ####################################
|
55 |
|
|
def fetchDBSInfo(self):
|
56 |
|
|
"""
|
57 |
|
|
Contact DBS
|
58 |
|
|
"""
|
59 |
|
|
parents = []
|
60 |
|
|
parentsblocksinfo = {}
|
61 |
|
|
|
62 |
afanfani |
1.3 |
## add the PU among the required data tiers if the Digi are requested
|
63 |
|
|
if (self.dataTiers.count('Digi')>0) & (self.dataTiers.count('PU')<=0) :
|
64 |
|
|
self.dataTiers.append('PU')
|
65 |
|
|
|
66 |
|
|
## get info about the requested dataset
|
67 |
afanfani |
1.1 |
dbs=DBSInfo(self.dbsdataset,self.dataTiers)
|
68 |
|
|
try:
|
69 |
|
|
self.blocksinfo=dbs.getDatasetContents()
|
70 |
afanfani |
1.3 |
except DBSError, ex:
|
71 |
|
|
raise DataDiscoveryError(ex.getErrorMessage())
|
72 |
|
|
|
73 |
|
|
if len(self.blocksinfo)<=0:
|
74 |
|
|
msg="\nERROR Data %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
|
75 |
|
|
raise NotExistingDatasetError(msg)
|
76 |
|
|
|
77 |
|
|
currentdatatier=string.split(self.blocksinfo.keys()[0],'/')[2]
|
78 |
|
|
fakedatatier=string.split(self.dbsdataset,'/')[2]
|
79 |
|
|
currentdbsdataset=string.replace(self.dbsdataset, fakedatatier, currentdatatier)
|
80 |
|
|
|
81 |
|
|
self.dbspaths.append(currentdbsdataset) # add the requested dbspath
|
82 |
|
|
|
83 |
|
|
## get info about the parents
|
84 |
afanfani |
1.1 |
try:
|
85 |
|
|
parents=dbs.getDatasetProvenance()
|
86 |
afanfani |
1.3 |
except DBSInvalidDataTierError, ex:
|
87 |
|
|
msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
|
88 |
|
|
raise DataDiscoveryError(msg)
|
89 |
|
|
except DBSError, ex:
|
90 |
|
|
raise DataDiscoveryError(ex.getErrorMessage())
|
91 |
|
|
|
92 |
|
|
## check that the user asks for parent Data Tier really existing in the DBS provenance
|
93 |
|
|
self.checkParentDataTier(parents, self.dataTiers, currentdbsdataset)
|
94 |
afanfani |
1.1 |
|
95 |
|
|
## for each parent get the corresponding fileblocks
|
96 |
|
|
for aparent in parents:
|
97 |
afanfani |
1.2 |
## fill a list of dbspaths
|
98 |
afanfani |
1.1 |
parentdbsdataset=aparent.getDatasetPath()
|
99 |
|
|
self.dbspaths.append(parentdbsdataset)
|
100 |
afanfani |
1.3 |
pdbs=DBSInfo(parentdbsdataset,[])
|
101 |
afanfani |
1.1 |
try:
|
102 |
|
|
parentsblocksinfo=pdbs.getDatasetContents()
|
103 |
afanfani |
1.3 |
except DBSError, ex:
|
104 |
|
|
raise DataDiscoveryError(ex.getErrorMessage())
|
105 |
afanfani |
1.1 |
|
106 |
|
|
self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
|
107 |
|
|
|
108 |
|
|
## all the required blocks
|
109 |
afanfani |
1.3 |
self.allblocks.append(self.blocksinfo.keys()) # add also the current fileblocksinfo
|
110 |
|
|
|
111 |
|
|
|
112 |
|
|
# #################################################
|
113 |
|
|
def checkParentDataTier(self, parents, user_datatiers, currentdbsdataset ):
|
114 |
|
|
"""
|
115 |
|
|
check that the data tiers requested by the user really exists in the provenance of the given dataset
|
116 |
|
|
"""
|
117 |
|
|
|
118 |
|
|
current_datatier=string.split(currentdbsdataset,'/')[2]
|
119 |
|
|
|
120 |
|
|
parent_datatypes=[]
|
121 |
|
|
for aparent in parents:
|
122 |
|
|
parent_datatypes.append(aparent.getDataType())
|
123 |
|
|
|
124 |
|
|
for datatier in user_datatiers:
|
125 |
|
|
if parent_datatypes.count(datatier)<=0:
|
126 |
|
|
# the current datatier is not supposed to be in the provenance
|
127 |
|
|
if not (datatier == current_datatier):
|
128 |
|
|
msg="\nERROR Data %s not published in DBS with asked data tiers : the data tier not found is %s !\n Check the data_tier variable in crab.cfg !"%(currentdbsdataset,datatier)
|
129 |
|
|
raise NoDataTierinProvenanceError(msg)
|
130 |
afanfani |
1.1 |
|
131 |
|
|
|
132 |
|
|
# #################################################
|
133 |
|
|
def getMaxEvents(self):
|
134 |
|
|
"""
|
135 |
|
|
max events of the primary dataset-owner
|
136 |
|
|
"""
|
137 |
|
|
## loop over the fileblocks of the primary dataset-owner
|
138 |
|
|
nevts=0
|
139 |
|
|
for blockevts in self.blocksinfo.values():
|
140 |
|
|
nevts=nevts+blockevts
|
141 |
|
|
|
142 |
|
|
return nevts
|
143 |
|
|
|
144 |
|
|
# #################################################
|
145 |
|
|
def getDBSPaths(self):
|
146 |
|
|
"""
|
147 |
|
|
list the DBSpaths for all required data
|
148 |
|
|
"""
|
149 |
|
|
return self.dbspaths
|
150 |
|
|
|
151 |
|
|
# #################################################
|
152 |
|
|
def getEVC(self):
|
153 |
|
|
"""
|
154 |
|
|
list the event collections structure by fileblock
|
155 |
|
|
"""
|
156 |
|
|
print "To be used by a more complex job splitting... TODO later... "
|
157 |
|
|
print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
|
158 |
|
|
|
159 |
|
|
# #################################################
|
160 |
|
|
def getFileBlocks(self):
|
161 |
|
|
"""
|
162 |
|
|
fileblocks for all required dataset-owners
|
163 |
|
|
"""
|
164 |
|
|
return self.allblocks
|
165 |
|
|
|
166 |
|
|
########################################################################
|
167 |
|
|
|
168 |
|
|
|