1 |
afanfani |
1.1 |
#!/usr/bin/env python2
|
2 |
|
|
import sys, os, string, re
|
3 |
|
|
from DBSInfo import *
|
4 |
|
|
|
5 |
|
|
# ####################################
|
6 |
|
|
class DataDiscoveryError:
|
7 |
|
|
def __init__(self):
|
8 |
|
|
print '\nERROR accessing Data Discovery\n'
|
9 |
|
|
pass
|
10 |
|
|
# ####################################
|
11 |
|
|
class DatasetContentsError:
|
12 |
|
|
def __init__(self):
|
13 |
|
|
print '\nERROR accessing Data Discovery : getDatasetContents\n'
|
14 |
|
|
pass
|
15 |
|
|
|
16 |
|
|
# ####################################
|
17 |
|
|
class DatasetProvenanceError:
|
18 |
|
|
def __init__(self):
|
19 |
|
|
print '\nERROR accessing Data Discovery : getDatasetProvenance\n'
|
20 |
|
|
pass
|
21 |
|
|
|
22 |
|
|
# ####################################
|
23 |
|
|
# class to find and extact info from published data
|
24 |
|
|
class DataDiscovery:
|
25 |
|
|
def __init__(self, owner, dataset, dataTiers, cfg_params):
|
26 |
|
|
|
27 |
|
|
# Attributes
|
28 |
|
|
self.dbsdataset=dataset+'/datatier/'+owner
|
29 |
|
|
self.dataTiers = dataTiers
|
30 |
|
|
self.cfg_params = cfg_params
|
31 |
|
|
|
32 |
|
|
self.dbspaths= [] # DBS output: list of dbspaths for all data
|
33 |
|
|
self.allblocks = [] # DBS output: list of map fileblocks-totevts for all dataset-owners
|
34 |
|
|
self.blocksinfo = {} # DBS output: map fileblocks-totevts for the primary block, used internally to this class
|
35 |
|
|
#DBS output: max events computed by method getMaxEvents
|
36 |
|
|
|
37 |
|
|
# ####################################
|
38 |
|
|
def fetchDBSInfo(self):
|
39 |
|
|
"""
|
40 |
|
|
Contact DBS
|
41 |
|
|
"""
|
42 |
|
|
parents = []
|
43 |
|
|
parentsblocksinfo = {}
|
44 |
|
|
self.dbspaths.append("/"+self.dbsdataset) # add the primary dbspath
|
45 |
|
|
# it might be replaced if one get from DBSAPI the primary dbspath as well
|
46 |
|
|
|
47 |
|
|
dbs=DBSInfo(self.dbsdataset,self.dataTiers)
|
48 |
|
|
try:
|
49 |
|
|
self.blocksinfo=dbs.getDatasetContents()
|
50 |
|
|
except dbs.DBSError:
|
51 |
|
|
raise DataDiscoveryError
|
52 |
|
|
try:
|
53 |
|
|
parents=dbs.getDatasetProvenance()
|
54 |
|
|
except:
|
55 |
|
|
raise DataDiscoveryError
|
56 |
|
|
|
57 |
|
|
## for each parent get the corresponding fileblocks
|
58 |
|
|
for aparent in parents:
|
59 |
afanfani |
1.2 |
## fill a list of dbspaths
|
60 |
afanfani |
1.1 |
parentdbsdataset=aparent.getDatasetPath()
|
61 |
|
|
self.dbspaths.append(parentdbsdataset)
|
62 |
afanfani |
1.2 |
#tmppath=str(parentdbsdataset[1:-1])
|
63 |
|
|
pdataset=string.split(parentdbsdataset,'/')[1]
|
64 |
|
|
pdt=string.split(parentdbsdataset,'/')[2]
|
65 |
|
|
powner=string.split(parentdbsdataset,'/')[3]
|
66 |
|
|
tmppath=pdataset+'/'+pdt+'/'+powner
|
67 |
|
|
## get the fileblocks of the parents : FIXME for the time being the first / in the path has to be removed
|
68 |
|
|
pdbs=DBSInfo(tmppath,[])
|
69 |
afanfani |
1.1 |
try:
|
70 |
|
|
parentsblocksinfo=pdbs.getDatasetContents()
|
71 |
|
|
except:
|
72 |
|
|
raise DataDiscoveryError
|
73 |
|
|
|
74 |
|
|
self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
|
75 |
|
|
|
76 |
|
|
## all the required blocks
|
77 |
|
|
self.allblocks.append(self.blocksinfo.keys()) # add also the primary fileblocksinfo
|
78 |
|
|
|
79 |
|
|
|
80 |
|
|
# #################################################
|
81 |
|
|
def getMaxEvents(self):
|
82 |
|
|
"""
|
83 |
|
|
max events of the primary dataset-owner
|
84 |
|
|
"""
|
85 |
|
|
## loop over the fileblocks of the primary dataset-owner
|
86 |
|
|
nevts=0
|
87 |
|
|
for blockevts in self.blocksinfo.values():
|
88 |
|
|
nevts=nevts+blockevts
|
89 |
|
|
|
90 |
|
|
return nevts
|
91 |
|
|
|
92 |
|
|
# #################################################
|
93 |
|
|
def getDBSPaths(self):
|
94 |
|
|
"""
|
95 |
|
|
list the DBSpaths for all required data
|
96 |
|
|
"""
|
97 |
|
|
return self.dbspaths
|
98 |
|
|
|
99 |
|
|
# #################################################
|
100 |
|
|
def getEVC(self):
|
101 |
|
|
"""
|
102 |
|
|
list the event collections structure by fileblock
|
103 |
|
|
"""
|
104 |
|
|
print "To be used by a more complex job splitting... TODO later... "
|
105 |
|
|
print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
|
106 |
|
|
|
107 |
|
|
# #################################################
|
108 |
|
|
def getFileBlocks(self):
|
109 |
|
|
"""
|
110 |
|
|
fileblocks for all required dataset-owners
|
111 |
|
|
"""
|
112 |
|
|
return self.allblocks
|
113 |
|
|
|
114 |
|
|
########################################################################
|
115 |
|
|
|
116 |
|
|
|