ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
Revision: 1.7
Committed: Thu Jul 6 10:22:47 2006 UTC (18 years, 9 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_1_2_0
Branch point for: CRAB_BOSS4
Changes since 1.6: +57 -59 lines
Log Message:
indenting is 4 spaces, not 1, or 2 or a combination of 1,2,4...

File Contents

# User Rev Content
1 gutsche 1.6 #!/usr/bin/env python
2 afanfani 1.1 import sys, os, string, re
3     from DBSInfo import *
4    
5 afanfani 1.3
6 afanfani 1.1 # ####################################
7 afanfani 1.3 class DataDiscoveryError(exceptions.Exception):
8 slacapra 1.7 def __init__(self, errorMessage):
9     args=errorMessage
10     exceptions.Exception.__init__(self, args)
11     pass
12    
13     def getErrorMessage(self):
14     """ Return exception error """
15     return "%s" % (self.args)
16 afanfani 1.3
17 afanfani 1.1 # ####################################
18 afanfani 1.3 class NotExistingDatasetError(exceptions.Exception):
19 slacapra 1.7 def __init__(self, errorMessage):
20     args=errorMessage
21     exceptions.Exception.__init__(self, args)
22     pass
23    
24     def getErrorMessage(self):
25     """ Return exception error """
26     return "%s" % (self.args)
27 afanfani 1.1
28     # ####################################
29 afanfani 1.3 class NoDataTierinProvenanceError(exceptions.Exception):
30 slacapra 1.7 def __init__(self, errorMessage):
31     args=errorMessage
32     exceptions.Exception.__init__(self, args)
33     pass
34    
35     def getErrorMessage(self):
36     """ Return exception error """
37     return "%s" % (self.args)
38 afanfani 1.1
39     # ####################################
40     # class to find and extact info from published data
41     class DataDiscovery:
42     def __init__(self, owner, dataset, dataTiers, cfg_params):
43    
44     # Attributes
45 afanfani 1.4 self.owner = owner
46     self.dataset = dataset
47 afanfani 1.1 self.dataTiers = dataTiers
48     self.cfg_params = cfg_params
49    
50 afanfani 1.3 self.dbspaths= [] # DBS output: list of dbspaths for all data
51 afanfani 1.1 self.allblocks = [] # DBS output: list of map fileblocks-totevts for all dataset-owners
52 afanfani 1.3 self.blocksinfo = {} # DBS output: map fileblocks-totevts for the primary block, used internally to this class
53 afanfani 1.1 #DBS output: max events computed by method getMaxEvents
54    
55     # ####################################
56     def fetchDBSInfo(self):
57     """
58     Contact DBS
59     """
60    
61 afanfani 1.3 ## add the PU among the required data tiers if the Digi are requested
62     if (self.dataTiers.count('Digi')>0) & (self.dataTiers.count('PU')<=0) :
63 slacapra 1.7 self.dataTiers.append('PU')
64 afanfani 1.3
65     ## get info about the requested dataset
66 afanfani 1.4 dbs=DBSInfo()
67 afanfani 1.5 try:
68 slacapra 1.7 self.datasets = dbs.getMatchingDatasets(self.owner, self.dataset)
69 afanfani 1.5 except DBSError, ex:
70 slacapra 1.7 raise DataDiscoveryError(ex.getErrorMessage())
71 afanfani 1.4 if len(self.datasets) == 0:
72 slacapra 1.7 raise DataDiscoveryError("Owner=%s, Dataset=%s unknown to DBS" % (self.owner, self.dataset))
73 afanfani 1.4 if len(self.datasets) > 1:
74 slacapra 1.7 raise DataDiscoveryError("Owner=%s, Dataset=%s is ambiguous" % (self.owner, self.dataset))
75 afanfani 1.1 try:
76 slacapra 1.7 self.dbsdataset = self.datasets[0].get('datasetPathName')
77     self.blocksinfo = dbs.getDatasetContents(self.dbsdataset)
78     self.allblocks.append (self.blocksinfo.keys ()) # add also the current fileblocksinfo
79     self.dbspaths.append(self.dbsdataset)
80 afanfani 1.3 except DBSError, ex:
81 slacapra 1.7 raise DataDiscoveryError(ex.getErrorMessage())
82 afanfani 1.3
83     if len(self.blocksinfo)<=0:
84 slacapra 1.7 msg="\nERROR Data for %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
85     raise NotExistingDatasetError(msg)
86 afanfani 1.3
87    
88     ## get info about the parents
89 afanfani 1.1 try:
90 slacapra 1.7 parents=dbs.getDatasetProvenance(self.dbsdataset, self.dataTiers)
91 afanfani 1.3 except DBSInvalidDataTierError, ex:
92 slacapra 1.7 msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
93     raise DataDiscoveryError(msg)
94 afanfani 1.3 except DBSError, ex:
95 slacapra 1.7 raise DataDiscoveryError(ex.getErrorMessage())
96 afanfani 1.3
97     ## check that the user asks for parent Data Tier really existing in the DBS provenance
98 afanfani 1.4 self.checkParentDataTier(parents, self.dataTiers)
99 afanfani 1.1
100     ## for each parent get the corresponding fileblocks
101 afanfani 1.4 try:
102 slacapra 1.7 for p in parents:
103     ## fill a list of dbspaths
104     parentPath = p.get('parent').get('datasetPathName')
105     self.dbspaths.append (parentPath)
106     parentBlocks = dbs.getDatasetContents (parentPath)
107     self.allblocks.append (parentBlocks.keys ()) # add parent fileblocksinfo
108     except DBSError, ex:
109     raise DataDiscoveryError(ex.getErrorMessage())
110 afanfani 1.1
111 afanfani 1.3 # #################################################
112 afanfani 1.4 def checkParentDataTier(self, parents, dataTiers):
113 afanfani 1.3 """
114 slacapra 1.7 check that the data tiers requested by the user really exists in the provenance of the given dataset
115 afanfani 1.3 """
116 afanfani 1.4 startType = string.split(self.dbsdataset,'/')[2]
117 afanfani 1.5 # for example 'type' is PU and 'dataTier' is Hit
118     parentTypes = map(lambda p: p.get('type'), parents)
119 afanfani 1.4 for tier in dataTiers:
120 slacapra 1.7 if parentTypes.count(tier) <= 0 and tier != startType:
121     msg="\nERROR Data %s not published in DBS with asked data tiers : the data tier not found is %s !\n Check the data_tier variable in crab.cfg !"%(self.dbsdataset,tier)
122     raise NoDataTierinProvenanceError(msg)
123 afanfani 1.1
124    
125     # #################################################
126     def getMaxEvents(self):
127     """
128 slacapra 1.7 max events of the primary dataset-owner
129 afanfani 1.1 """
130     ## loop over the fileblocks of the primary dataset-owner
131     nevts=0
132     for blockevts in self.blocksinfo.values():
133 slacapra 1.7 nevts=nevts+blockevts
134 afanfani 1.1
135     return nevts
136    
137     # #################################################
138     def getDBSPaths(self):
139     """
140 slacapra 1.7 list the DBSpaths for all required data
141 afanfani 1.1 """
142     return self.dbspaths
143    
144     # #################################################
145     def getEVC(self):
146     """
147 slacapra 1.7 list the event collections structure by fileblock
148 afanfani 1.1 """
149     print "To be used by a more complex job splitting... TODO later... "
150     print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
151    
152     # #################################################
153     def getFileBlocks(self):
154     """
155 slacapra 1.7 fileblocks for all required dataset-owners
156 afanfani 1.1 """
157     return self.allblocks
158    
159     ########################################################################