1 |
#!/usr/bin/env python2
|
2 |
import sys, os, string, re
|
3 |
import urllib
|
4 |
import urllib2
|
5 |
from DBSInfo import *
|
6 |
|
7 |
# ####################################
|
8 |
class DataDiscoveryError:
|
9 |
def __init__(self):
|
10 |
print '\nERROR accessing Data Discovery\n'
|
11 |
pass
|
12 |
|
13 |
# ####################################
|
14 |
# class to find and extact info from published data
|
15 |
class DataDiscovery:
|
16 |
def __init__(self, owner, dataset, dataTiers, cfg_params):
|
17 |
|
18 |
# Attributes
|
19 |
self.owner = owner
|
20 |
self.dataset = dataset
|
21 |
self.dataTiers = dataTiers
|
22 |
self.cfg_params = cfg_params
|
23 |
|
24 |
self.dataset_owner = {} # DBS output: map dataset-owner for all data
|
25 |
self.allblocks = [] # DBS output: list of map fileblocks-totevts for all dataset-owners
|
26 |
self.blocksinfo = {} # DBS output: map fileblocks-totevts for the primary block, used internally to this class
|
27 |
#DBS output: max events computed by method getMaxEvents
|
28 |
|
29 |
# ####################################
|
30 |
def fetchDBSInfo(self):
|
31 |
"""
|
32 |
Contact DBS
|
33 |
"""
|
34 |
parents = {}
|
35 |
parentsblocksinfo = {}
|
36 |
self.dataset_owner[self.owner]=self.dataset # add the map dataset-owner
|
37 |
|
38 |
dbs=DBSInfo(self.owner,self.dataset,self.dataTiers)
|
39 |
try:
|
40 |
self.blocksinfo=dbs.getDatasetContents()
|
41 |
except:
|
42 |
raise DataDiscoveryError
|
43 |
|
44 |
try:
|
45 |
parents=dbs.getDatasetProvenance()
|
46 |
except:
|
47 |
raise DataDiscoveryError
|
48 |
|
49 |
## for each parent get the corresponding fileblocks
|
50 |
for aparent in parents:
|
51 |
## fill the map dataset-owner for the parents
|
52 |
pdataset=string.split(aparent,'/')[1]
|
53 |
powner=string.split(aparent,'/')[3]
|
54 |
self.dataset_owner[powner]=pdataset
|
55 |
## get the fileblocks of the parents
|
56 |
pdbs=DBSInfo(powner,pdataset,[])
|
57 |
parentsblocksinfo=pdbs.getDatasetContents()
|
58 |
self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
|
59 |
|
60 |
## all the required blocks
|
61 |
self.allblocks.append(self.blocksinfo.keys()) # add also the primary fileblocksinfo
|
62 |
|
63 |
|
64 |
# #################################################
|
65 |
def getMaxEvents(self):
|
66 |
"""
|
67 |
max events of the primary dataset-owner
|
68 |
"""
|
69 |
## loop over the fileblocks of the primary dataset-owner
|
70 |
nevts=0
|
71 |
for blockevts in self.blocksinfo.values():
|
72 |
nevts=nevts+blockevts
|
73 |
|
74 |
return nevts
|
75 |
|
76 |
# #################################################
|
77 |
def getDatasetOwnerPairs(self):
|
78 |
"""
|
79 |
list all required dataset-owner pairs
|
80 |
"""
|
81 |
return self.dataset_owner
|
82 |
|
83 |
# #################################################
|
84 |
def getEVC(self):
|
85 |
"""
|
86 |
list the event collections structure by fileblock
|
87 |
"""
|
88 |
print "To be used by a more complex job splitting... TODO later... "
|
89 |
print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
|
90 |
|
91 |
# #################################################
|
92 |
def getFileBlocks(self):
|
93 |
"""
|
94 |
fileblocks for all required dataset-owners
|
95 |
"""
|
96 |
return self.allblocks
|
97 |
|
98 |
########################################################################
|
99 |
|
100 |
|