ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
(Generate patch)

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.1 by afanfani, Mon Nov 7 23:17:44 2005 UTC vs.
Revision 1.7 by slacapra, Thu Jul 6 10:22:47 2006 UTC

# Line 1 | Line 1
1 < #!/usr/bin/env python2
1 > #!/usr/bin/env python
2   import sys, os, string, re
3   from DBSInfo import *
4  
5 +
6   # ####################################
7 < class DataDiscoveryError:
8 <    def __init__(self):
9 <        print '\nERROR accessing Data Discovery\n'
7 > class DataDiscoveryError(exceptions.Exception):
8 >    def __init__(self, errorMessage):
9 >        args=errorMessage
10 >        exceptions.Exception.__init__(self, args)
11          pass
12 +
13 +    def getErrorMessage(self):
14 +        """ Return exception error """
15 +        return "%s" % (self.args)
16 +
17   # ####################################
18 < class DatasetContentsError:
19 <    def __init__(self):
20 <        print '\nERROR accessing Data Discovery : getDatasetContents\n'
18 > class NotExistingDatasetError(exceptions.Exception):
19 >    def __init__(self, errorMessage):
20 >        args=errorMessage
21 >        exceptions.Exception.__init__(self, args)
22          pass
23  
24 +    def getErrorMessage(self):
25 +        """ Return exception error """
26 +        return "%s" % (self.args)
27 +
28   # ####################################
29 < class DatasetProvenanceError:
30 <    def __init__(self):
31 <        print '\nERROR accessing Data Discovery : getDatasetProvenance\n'
29 > class NoDataTierinProvenanceError(exceptions.Exception):
30 >    def __init__(self, errorMessage):
31 >        args=errorMessage
32 >        exceptions.Exception.__init__(self, args)
33          pass
34  
35 +    def getErrorMessage(self):
36 +        """ Return exception error """
37 +        return "%s" % (self.args)
38 +
39   # ####################################
40   # class to find and extact info from published data
41   class DataDiscovery:
42      def __init__(self, owner, dataset, dataTiers, cfg_params):
43  
44   #       Attributes
45 <        self.dbsdataset=dataset+'/datatier/'+owner
45 >        self.owner = owner
46 >        self.dataset = dataset
47          self.dataTiers = dataTiers
48          self.cfg_params = cfg_params
49  
50 <        self.dbspaths= []  # DBS output: list of dbspaths for all data
50 >        self.dbspaths= []     # DBS output: list of dbspaths for all data
51          self.allblocks = []   # DBS output: list of map fileblocks-totevts for all dataset-owners
52 <        self.blocksinfo = {}     # DBS output: map fileblocks-totevts for the primary block, used internally to this class
52 >        self.blocksinfo = {}  # DBS output: map fileblocks-totevts for the primary block, used internally to this class
53   #DBS output: max events computed by method getMaxEvents
54  
55   # ####################################
# Line 39 | Line 57 | class DataDiscovery:
57          """
58          Contact DBS
59          """
42        parents = []
43        parentsblocksinfo = {}
44        self.dbspaths.append("/"+self.dbsdataset) # add the primary dbspath
45                                                  # it might be replaced if one get from DBSAPI the primary dbspath as well
60  
61 <        dbs=DBSInfo(self.dbsdataset,self.dataTiers)
61 >        ## add the PU among the required data tiers if the Digi are requested
62 >        if (self.dataTiers.count('Digi')>0) & (self.dataTiers.count('PU')<=0) :
63 >            self.dataTiers.append('PU')
64 >
65 >        ## get info about the requested dataset
66 >        dbs=DBSInfo()
67          try:
68 <          self.blocksinfo=dbs.getDatasetContents()
69 <        except dbs.DBSError:
70 <          raise DataDiscoveryError
68 >            self.datasets = dbs.getMatchingDatasets(self.owner, self.dataset)
69 >        except DBSError, ex:
70 >            raise DataDiscoveryError(ex.getErrorMessage())
71 >        if len(self.datasets) == 0:
72 >            raise DataDiscoveryError("Owner=%s, Dataset=%s unknown to DBS" % (self.owner, self.dataset))
73 >        if len(self.datasets) > 1:
74 >            raise DataDiscoveryError("Owner=%s, Dataset=%s is ambiguous" % (self.owner, self.dataset))
75          try:
76 <          parents=dbs.getDatasetProvenance()
77 <        except:
78 <          raise DataDiscoveryError
76 >            self.dbsdataset = self.datasets[0].get('datasetPathName')
77 >            self.blocksinfo = dbs.getDatasetContents(self.dbsdataset)
78 >            self.allblocks.append (self.blocksinfo.keys ()) # add also the current fileblocksinfo
79 >            self.dbspaths.append(self.dbsdataset)
80 >        except DBSError, ex:
81 >            raise DataDiscoveryError(ex.getErrorMessage())
82 >        
83 >        if len(self.blocksinfo)<=0:
84 >            msg="\nERROR Data for %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
85 >            raise NotExistingDatasetError(msg)
86  
57        ## for each parent get the corresponding fileblocks
58        for aparent in parents:
59           ## fill the map dataset-owner for the parents
60           #pdataset=string.split(aparent,'/')[1]
61           #powner=string.split(aparent,'/')[3]
62           #self.dataset_owner[powner]=pdataset
63           ## instead of the map dataset-owner use the dbspaths  
64           parentdbsdataset=aparent.getDatasetPath()
65           self.dbspaths.append(parentdbsdataset)
66           #self.dbspaths.append(aparent)
67           ## get the fileblocks of the parents : FIXME remove the first / in the path
68           pdbs=DBSInfo(parentdbsdataset[1:-1],[])
69           try:
70             parentsblocksinfo=pdbs.getDatasetContents()
71           except:
72            raise DataDiscoveryError
87  
88 <           self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
88 >        ## get info about the parents
89 >        try:
90 >            parents=dbs.getDatasetProvenance(self.dbsdataset, self.dataTiers)
91 >        except DBSInvalidDataTierError, ex:
92 >            msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
93 >            raise DataDiscoveryError(msg)
94 >        except DBSError, ex:
95 >            raise DataDiscoveryError(ex.getErrorMessage())
96 >
97 >        ## check that the user asks for parent Data Tier really existing in the DBS provenance
98 >        self.checkParentDataTier(parents, self.dataTiers)
99 >
100 >        ## for each parent get the corresponding fileblocks
101 >        try:
102 >            for p in parents:
103 >                ## fill a list of dbspaths
104 >                parentPath = p.get('parent').get('datasetPathName')
105 >                self.dbspaths.append (parentPath)
106 >                parentBlocks = dbs.getDatasetContents (parentPath)
107 >                self.allblocks.append (parentBlocks.keys ())  # add parent fileblocksinfo
108 >            except DBSError, ex:
109 >                raise DataDiscoveryError(ex.getErrorMessage())
110  
111 <        ## all the required blocks
112 <        self.allblocks.append(self.blocksinfo.keys()) # add also the primary fileblocksinfo
111 > # #################################################
112 >    def checkParentDataTier(self, parents, dataTiers):
113 >        """
114 >        check that the data tiers requested by the user really exists in the provenance of the given dataset
115 >        """
116 >        startType = string.split(self.dbsdataset,'/')[2]
117 >        # for example 'type' is PU and 'dataTier' is Hit
118 >        parentTypes = map(lambda p: p.get('type'), parents)
119 >        for tier in dataTiers:
120 >            if parentTypes.count(tier) <= 0 and tier != startType:
121 >                msg="\nERROR Data %s not published in DBS with asked data tiers : the data tier not found is %s !\n  Check the data_tier variable in crab.cfg !"%(self.dbsdataset,tier)
122 >                raise  NoDataTierinProvenanceError(msg)
123  
124  
125   # #################################################
126      def getMaxEvents(self):
127          """
128 <         max events of the primary dataset-owner
128 >        max events of the primary dataset-owner
129          """
130          ## loop over the fileblocks of the primary dataset-owner
131          nevts=0      
132          for blockevts in self.blocksinfo.values():
133 <          nevts=nevts+blockevts
133 >            nevts=nevts+blockevts
134  
135          return nevts
136  
137   # #################################################
93    def getDatasetOwnerPairs(self):
94        """
95         list all required dataset-owner pairs
96        """
97        return self.dataset_owner
98 # #################################################
138      def getDBSPaths(self):
139          """
140 <         list the DBSpaths for all required data
140 >        list the DBSpaths for all required data
141          """
142          return self.dbspaths
143  
144   # #################################################
145      def getEVC(self):
146          """
147 <         list the event collections structure by fileblock
147 >        list the event collections structure by fileblock
148          """
149          print "To be used by a more complex job splitting... TODO later... "
150          print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
# Line 113 | Line 152 | class DataDiscovery:
152   # #################################################
153      def getFileBlocks(self):
154          """
155 <         fileblocks for all required dataset-owners
155 >        fileblocks for all required dataset-owners
156          """
157          return self.allblocks        
158  
159   ########################################################################
121
122

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines