ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
Revision: 1.6
Committed: Sun May 28 02:20:13 2006 UTC (18 years, 11 months ago) by gutsche
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_1_2_0_pre9, CRAB_1_2_0_pre8, CRAB_1_2_0_pre7, post_cmssw_integration_20060527
Changes since 1.5: +1 -1 lines
Log Message:
Integrate CMSSW: change used python from python2 to python

File Contents

# User Rev Content
1 gutsche 1.6 #!/usr/bin/env python
2 afanfani 1.1 import sys, os, string, re
3     from DBSInfo import *
4    
5 afanfani 1.3
6 afanfani 1.1 # ####################################
7 afanfani 1.3 class DataDiscoveryError(exceptions.Exception):
8     def __init__(self, errorMessage):
9     args=errorMessage
10     exceptions.Exception.__init__(self, args)
11     pass
12    
13     def getErrorMessage(self):
14     """ Return exception error """
15     return "%s" % (self.args)
16    
17 afanfani 1.1 # ####################################
18 afanfani 1.3 class NotExistingDatasetError(exceptions.Exception):
19     def __init__(self, errorMessage):
20     args=errorMessage
21     exceptions.Exception.__init__(self, args)
22     pass
23    
24     def getErrorMessage(self):
25     """ Return exception error """
26     return "%s" % (self.args)
27 afanfani 1.1
28     # ####################################
29 afanfani 1.3 class NoDataTierinProvenanceError(exceptions.Exception):
30     def __init__(self, errorMessage):
31     args=errorMessage
32     exceptions.Exception.__init__(self, args)
33     pass
34    
35     def getErrorMessage(self):
36     """ Return exception error """
37     return "%s" % (self.args)
38 afanfani 1.1
39     # ####################################
40     # class to find and extact info from published data
41     class DataDiscovery:
42     def __init__(self, owner, dataset, dataTiers, cfg_params):
43    
44     # Attributes
45 afanfani 1.4 self.owner = owner
46     self.dataset = dataset
47 afanfani 1.1 self.dataTiers = dataTiers
48     self.cfg_params = cfg_params
49    
50 afanfani 1.3 self.dbspaths= [] # DBS output: list of dbspaths for all data
51 afanfani 1.1 self.allblocks = [] # DBS output: list of map fileblocks-totevts for all dataset-owners
52 afanfani 1.3 self.blocksinfo = {} # DBS output: map fileblocks-totevts for the primary block, used internally to this class
53 afanfani 1.1 #DBS output: max events computed by method getMaxEvents
54    
55     # ####################################
56     def fetchDBSInfo(self):
57     """
58     Contact DBS
59     """
60    
61 afanfani 1.3 ## add the PU among the required data tiers if the Digi are requested
62     if (self.dataTiers.count('Digi')>0) & (self.dataTiers.count('PU')<=0) :
63     self.dataTiers.append('PU')
64    
65     ## get info about the requested dataset
66 afanfani 1.4 dbs=DBSInfo()
67 afanfani 1.5 try:
68     self.datasets = dbs.getMatchingDatasets(self.owner, self.dataset)
69     except DBSError, ex:
70     raise DataDiscoveryError(ex.getErrorMessage())
71 afanfani 1.4 if len(self.datasets) == 0:
72     raise DataDiscoveryError("Owner=%s, Dataset=%s unknown to DBS" % (self.owner, self.dataset))
73     if len(self.datasets) > 1:
74     raise DataDiscoveryError("Owner=%s, Dataset=%s is ambiguous" % (self.owner, self.dataset))
75 afanfani 1.1 try:
76 afanfani 1.5 self.dbsdataset = self.datasets[0].get('datasetPathName')
77 afanfani 1.4 self.blocksinfo = dbs.getDatasetContents(self.dbsdataset)
78     self.allblocks.append (self.blocksinfo.keys ()) # add also the current fileblocksinfo
79     self.dbspaths.append(self.dbsdataset)
80 afanfani 1.3 except DBSError, ex:
81     raise DataDiscoveryError(ex.getErrorMessage())
82    
83     if len(self.blocksinfo)<=0:
84 afanfani 1.4 msg="\nERROR Data for %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
85 afanfani 1.3 raise NotExistingDatasetError(msg)
86    
87    
88     ## get info about the parents
89 afanfani 1.1 try:
90 afanfani 1.4 parents=dbs.getDatasetProvenance(self.dbsdataset, self.dataTiers)
91 afanfani 1.3 except DBSInvalidDataTierError, ex:
92     msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
93     raise DataDiscoveryError(msg)
94     except DBSError, ex:
95     raise DataDiscoveryError(ex.getErrorMessage())
96    
97     ## check that the user asks for parent Data Tier really existing in the DBS provenance
98 afanfani 1.4 self.checkParentDataTier(parents, self.dataTiers)
99 afanfani 1.1
100     ## for each parent get the corresponding fileblocks
101 afanfani 1.4 try:
102     for p in parents:
103     ## fill a list of dbspaths
104 afanfani 1.5 parentPath = p.get('parent').get('datasetPathName')
105 afanfani 1.4 self.dbspaths.append (parentPath)
106     parentBlocks = dbs.getDatasetContents (parentPath)
107     self.allblocks.append (parentBlocks.keys ()) # add parent fileblocksinfo
108     except DBSError, ex:
109 afanfani 1.3 raise DataDiscoveryError(ex.getErrorMessage())
110 afanfani 1.1
111 afanfani 1.3 # #################################################
112 afanfani 1.4 def checkParentDataTier(self, parents, dataTiers):
113 afanfani 1.3 """
114     check that the data tiers requested by the user really exists in the provenance of the given dataset
115     """
116 afanfani 1.4 startType = string.split(self.dbsdataset,'/')[2]
117 afanfani 1.5 # for example 'type' is PU and 'dataTier' is Hit
118     parentTypes = map(lambda p: p.get('type'), parents)
119 afanfani 1.4 for tier in dataTiers:
120     if parentTypes.count(tier) <= 0 and tier != startType:
121     msg="\nERROR Data %s not published in DBS with asked data tiers : the data tier not found is %s !\n Check the data_tier variable in crab.cfg !"%(self.dbsdataset,tier)
122     raise NoDataTierinProvenanceError(msg)
123 afanfani 1.1
124    
125     # #################################################
126     def getMaxEvents(self):
127     """
128     max events of the primary dataset-owner
129     """
130     ## loop over the fileblocks of the primary dataset-owner
131     nevts=0
132     for blockevts in self.blocksinfo.values():
133     nevts=nevts+blockevts
134    
135     return nevts
136    
137     # #################################################
138     def getDBSPaths(self):
139     """
140     list the DBSpaths for all required data
141     """
142     return self.dbspaths
143    
144     # #################################################
145     def getEVC(self):
146     """
147     list the event collections structure by fileblock
148     """
149     print "To be used by a more complex job splitting... TODO later... "
150     print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
151    
152     # #################################################
153     def getFileBlocks(self):
154     """
155     fileblocks for all required dataset-owners
156     """
157     return self.allblocks
158    
159     ########################################################################
160    
161