ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
Revision: 1.3
Committed: Sun Jan 29 01:46:08 2006 UTC (19 years, 3 months ago) by afanfani
Content type: text/x-python
Branch: MAIN
Changes since 1.2: +83 -31 lines
Log Message:
improvements to deal with error conditions ( i.e. dataset not existing, invalid datatie .... etc with changed DBS API)

File Contents

# User Rev Content
1 afanfani 1.1 #!/usr/bin/env python2
2     import sys, os, string, re
3     from DBSInfo import *
4    
5 afanfani 1.3
6 afanfani 1.1 # ####################################
7 afanfani 1.3 class DataDiscoveryError(exceptions.Exception):
8     def __init__(self, errorMessage):
9     args=errorMessage
10     exceptions.Exception.__init__(self, args)
11     pass
12    
13     def getErrorMessage(self):
14     """ Return exception error """
15     return "%s" % (self.args)
16    
17 afanfani 1.1 # ####################################
18 afanfani 1.3 class NotExistingDatasetError(exceptions.Exception):
19     def __init__(self, errorMessage):
20     args=errorMessage
21     exceptions.Exception.__init__(self, args)
22     pass
23    
24     def getErrorMessage(self):
25     """ Return exception error """
26     return "%s" % (self.args)
27 afanfani 1.1
28     # ####################################
29 afanfani 1.3 class NoDataTierinProvenanceError(exceptions.Exception):
30     def __init__(self, errorMessage):
31     args=errorMessage
32     exceptions.Exception.__init__(self, args)
33     pass
34    
35     def getErrorMessage(self):
36     """ Return exception error """
37     return "%s" % (self.args)
38 afanfani 1.1
39     # ####################################
40     # class to find and extact info from published data
41     class DataDiscovery:
42     def __init__(self, owner, dataset, dataTiers, cfg_params):
43    
44     # Attributes
45 afanfani 1.3 self.dbsdataset='/'+dataset+'/datatier/'+owner
46 afanfani 1.1 self.dataTiers = dataTiers
47     self.cfg_params = cfg_params
48    
49 afanfani 1.3 self.dbspaths= [] # DBS output: list of dbspaths for all data
50 afanfani 1.1 self.allblocks = [] # DBS output: list of map fileblocks-totevts for all dataset-owners
51 afanfani 1.3 self.blocksinfo = {} # DBS output: map fileblocks-totevts for the primary block, used internally to this class
52 afanfani 1.1 #DBS output: max events computed by method getMaxEvents
53    
54     # ####################################
55     def fetchDBSInfo(self):
56     """
57     Contact DBS
58     """
59     parents = []
60     parentsblocksinfo = {}
61    
62 afanfani 1.3 ## add the PU among the required data tiers if the Digi are requested
63     if (self.dataTiers.count('Digi')>0) & (self.dataTiers.count('PU')<=0) :
64     self.dataTiers.append('PU')
65    
66     ## get info about the requested dataset
67 afanfani 1.1 dbs=DBSInfo(self.dbsdataset,self.dataTiers)
68     try:
69     self.blocksinfo=dbs.getDatasetContents()
70 afanfani 1.3 except DBSError, ex:
71     raise DataDiscoveryError(ex.getErrorMessage())
72    
73     if len(self.blocksinfo)<=0:
74     msg="\nERROR Data %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
75     raise NotExistingDatasetError(msg)
76    
77     currentdatatier=string.split(self.blocksinfo.keys()[0],'/')[2]
78     fakedatatier=string.split(self.dbsdataset,'/')[2]
79     currentdbsdataset=string.replace(self.dbsdataset, fakedatatier, currentdatatier)
80    
81     self.dbspaths.append(currentdbsdataset) # add the requested dbspath
82    
83     ## get info about the parents
84 afanfani 1.1 try:
85     parents=dbs.getDatasetProvenance()
86 afanfani 1.3 except DBSInvalidDataTierError, ex:
87     msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
88     raise DataDiscoveryError(msg)
89     except DBSError, ex:
90     raise DataDiscoveryError(ex.getErrorMessage())
91    
92     ## check that the user asks for parent Data Tier really existing in the DBS provenance
93     self.checkParentDataTier(parents, self.dataTiers, currentdbsdataset)
94 afanfani 1.1
95     ## for each parent get the corresponding fileblocks
96     for aparent in parents:
97 afanfani 1.2 ## fill a list of dbspaths
98 afanfani 1.1 parentdbsdataset=aparent.getDatasetPath()
99     self.dbspaths.append(parentdbsdataset)
100 afanfani 1.3 pdbs=DBSInfo(parentdbsdataset,[])
101 afanfani 1.1 try:
102     parentsblocksinfo=pdbs.getDatasetContents()
103 afanfani 1.3 except DBSError, ex:
104     raise DataDiscoveryError(ex.getErrorMessage())
105 afanfani 1.1
106     self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
107    
108     ## all the required blocks
109 afanfani 1.3 self.allblocks.append(self.blocksinfo.keys()) # add also the current fileblocksinfo
110    
111    
112     # #################################################
113     def checkParentDataTier(self, parents, user_datatiers, currentdbsdataset ):
114     """
115     check that the data tiers requested by the user really exists in the provenance of the given dataset
116     """
117    
118     current_datatier=string.split(currentdbsdataset,'/')[2]
119    
120     parent_datatypes=[]
121     for aparent in parents:
122     parent_datatypes.append(aparent.getDataType())
123    
124     for datatier in user_datatiers:
125     if parent_datatypes.count(datatier)<=0:
126     # the current datatier is not supposed to be in the provenance
127     if not (datatier == current_datatier):
128     msg="\nERROR Data %s not published in DBS with asked data tiers : the data tier not found is %s !\n Check the data_tier variable in crab.cfg !"%(currentdbsdataset,datatier)
129     raise NoDataTierinProvenanceError(msg)
130 afanfani 1.1
131    
132     # #################################################
133     def getMaxEvents(self):
134     """
135     max events of the primary dataset-owner
136     """
137     ## loop over the fileblocks of the primary dataset-owner
138     nevts=0
139     for blockevts in self.blocksinfo.values():
140     nevts=nevts+blockevts
141    
142     return nevts
143    
144     # #################################################
145     def getDBSPaths(self):
146     """
147     list the DBSpaths for all required data
148     """
149     return self.dbspaths
150    
151     # #################################################
152     def getEVC(self):
153     """
154     list the event collections structure by fileblock
155     """
156     print "To be used by a more complex job splitting... TODO later... "
157     print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
158    
159     # #################################################
160     def getFileBlocks(self):
161     """
162     fileblocks for all required dataset-owners
163     """
164     return self.allblocks
165    
166     ########################################################################
167    
168