ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
(Generate patch)

Comparing COMP/CRAB/python/DataDiscovery.py (file contents):
Revision 1.3 by afanfani, Sun Jan 29 01:46:08 2006 UTC vs.
Revision 1.7.2.1 by spiga, Thu Jul 20 12:03:48 2006 UTC

# Line 1 | Line 1
1 < #!/usr/bin/env python2
1 > #!/usr/bin/env python
2   import sys, os, string, re
3   from DBSInfo import *
4  
5  
6   # ####################################
7   class DataDiscoveryError(exceptions.Exception):
8 <  def __init__(self, errorMessage):
9 <   args=errorMessage
10 <   exceptions.Exception.__init__(self, args)
11 <   pass
12 <
13 <  def getErrorMessage(self):
14 <   """ Return exception error """
15 <   return "%s" % (self.args)
8 >    def __init__(self, errorMessage):
9 >        args=errorMessage
10 >        exceptions.Exception.__init__(self, args)
11 >        pass
12 >
13 >    def getErrorMessage(self):
14 >        """ Return exception error """
15 >        return "%s" % (self.args)
16  
17   # ####################################
18   class NotExistingDatasetError(exceptions.Exception):
19 <  def __init__(self, errorMessage):
20 <   args=errorMessage
21 <   exceptions.Exception.__init__(self, args)
22 <   pass
23 <
24 <  def getErrorMessage(self):
25 <   """ Return exception error """
26 <   return "%s" % (self.args)
19 >    def __init__(self, errorMessage):
20 >        args=errorMessage
21 >        exceptions.Exception.__init__(self, args)
22 >        pass
23 >
24 >    def getErrorMessage(self):
25 >        """ Return exception error """
26 >        return "%s" % (self.args)
27  
28   # ####################################
29   class NoDataTierinProvenanceError(exceptions.Exception):
30 <  def __init__(self, errorMessage):
31 <   args=errorMessage
32 <   exceptions.Exception.__init__(self, args)
33 <   pass
34 <
35 <  def getErrorMessage(self):
36 <   """ Return exception error """
37 <   return "%s" % (self.args)
30 >    def __init__(self, errorMessage):
31 >        args=errorMessage
32 >        exceptions.Exception.__init__(self, args)
33 >        pass
34 >
35 >    def getErrorMessage(self):
36 >        """ Return exception error """
37 >        return "%s" % (self.args)
38  
39   # ####################################
40   # class to find and extact info from published data
# Line 42 | Line 42 | class DataDiscovery:
42      def __init__(self, owner, dataset, dataTiers, cfg_params):
43  
44   #       Attributes
45 <        self.dbsdataset='/'+dataset+'/datatier/'+owner
45 >        self.owner = owner
46 >        self.dataset = dataset
47          self.dataTiers = dataTiers
48          self.cfg_params = cfg_params
49  
# Line 56 | Line 57 | class DataDiscovery:
57          """
58          Contact DBS
59          """
59        parents = []
60        parentsblocksinfo = {}
60  
61          ## add the PU among the required data tiers if the Digi are requested
62          if (self.dataTiers.count('Digi')>0) & (self.dataTiers.count('PU')<=0) :
63 <          self.dataTiers.append('PU')
63 >            self.dataTiers.append('PU')
64  
65          ## get info about the requested dataset
66 <        dbs=DBSInfo(self.dbsdataset,self.dataTiers)
66 >        dbs=DBSInfo()
67          try:
68 <          self.blocksinfo=dbs.getDatasetContents()
68 >            self.datasets = dbs.getMatchingDatasets(self.owner, self.dataset)
69          except DBSError, ex:
70 <          raise DataDiscoveryError(ex.getErrorMessage())
70 >            raise DataDiscoveryError(ex.getErrorMessage())
71 >        if len(self.datasets) == 0:
72 >            raise DataDiscoveryError("Owner=%s, Dataset=%s unknown to DBS" % (self.owner, self.dataset))
73 >        if len(self.datasets) > 1:
74 >            raise DataDiscoveryError("Owner=%s, Dataset=%s is ambiguous" % (self.owner, self.dataset))
75 >        try:
76 >            self.dbsdataset = self.datasets[0].get('datasetPathName')
77 >            self.blocksinfo = dbs.getDatasetContents(self.dbsdataset)
78 >            self.allblocks.append (self.blocksinfo.keys ()) # add also the current fileblocksinfo
79 >            self.dbspaths.append(self.dbsdataset)
80 >        except DBSError, ex:
81 >            raise DataDiscoveryError(ex.getErrorMessage())
82          
83          if len(self.blocksinfo)<=0:
84 <         msg="\nERROR Data %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
85 <         raise NotExistingDatasetError(msg)
84 >            msg="\nERROR Data for %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
85 >            raise NotExistingDatasetError(msg)
86  
77        currentdatatier=string.split(self.blocksinfo.keys()[0],'/')[2]
78        fakedatatier=string.split(self.dbsdataset,'/')[2]
79        currentdbsdataset=string.replace(self.dbsdataset, fakedatatier, currentdatatier)  
80
81        self.dbspaths.append(currentdbsdataset)    # add the requested dbspath
87  
88          ## get info about the parents
89          try:
90 <          parents=dbs.getDatasetProvenance()
90 >            parents=dbs.getDatasetProvenance(self.dbsdataset, self.dataTiers)
91          except DBSInvalidDataTierError, ex:
92 <          msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
93 <          raise DataDiscoveryError(msg)
92 >            msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
93 >            raise DataDiscoveryError(msg)
94          except DBSError, ex:
95 <          raise DataDiscoveryError(ex.getErrorMessage())
95 >            raise DataDiscoveryError(ex.getErrorMessage())
96  
97          ## check that the user asks for parent Data Tier really existing in the DBS provenance
98 <        self.checkParentDataTier(parents, self.dataTiers, currentdbsdataset)
98 >        self.checkParentDataTier(parents, self.dataTiers)
99  
100          ## for each parent get the corresponding fileblocks
101 <        for aparent in parents:
102 <           ## fill a list of dbspaths
103 <           parentdbsdataset=aparent.getDatasetPath()
104 <           self.dbspaths.append(parentdbsdataset)
105 <           pdbs=DBSInfo(parentdbsdataset,[])
106 <           try:
107 <             parentsblocksinfo=pdbs.getDatasetContents()
108 <           except DBSError, ex:
101 >        try:
102 >            for p in parents:
103 >                ## fill a list of dbspaths
104 >                parentPath = p.get('parent').get('datasetPathName')
105 >                self.dbspaths.append (parentPath)
106 >                parentBlocks = dbs.getDatasetContents (parentPath)
107 >                self.allblocks.append (parentBlocks.keys ())  # add parent fileblocksinfo
108 >        except DBSError, ex:
109              raise DataDiscoveryError(ex.getErrorMessage())
110  
106           self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
107
108        ## all the required blocks
109        self.allblocks.append(self.blocksinfo.keys()) # add also the current fileblocksinfo
110
111
111   # #################################################
112 <    def checkParentDataTier(self, parents, user_datatiers, currentdbsdataset ):
112 >    def checkParentDataTier(self, parents, dataTiers):
113          """
114 <         check that the data tiers requested by the user really exists in the provenance of the given dataset
114 >        check that the data tiers requested by the user really exists in the provenance of the given dataset
115          """
116 <
117 <        current_datatier=string.split(currentdbsdataset,'/')[2]
118 <
119 <        parent_datatypes=[]
120 <        for aparent in parents:
121 <          parent_datatypes.append(aparent.getDataType())
122 <
124 <        for datatier in user_datatiers:
125 <          if parent_datatypes.count(datatier)<=0:
126 <             # the current datatier is not supposed to be in the provenance
127 <             if not (datatier == current_datatier):  
128 <              msg="\nERROR Data %s not published in DBS with asked data tiers : the data tier not found is %s !\n  Check the data_tier variable in crab.cfg !"%(currentdbsdataset,datatier)
129 <              raise  NoDataTierinProvenanceError(msg)
116 >        startType = string.split(self.dbsdataset,'/')[2]
117 >        # for example 'type' is PU and 'dataTier' is Hit
118 >        parentTypes = map(lambda p: p.get('type'), parents)
119 >        for tier in dataTiers:
120 >            if parentTypes.count(tier) <= 0 and tier != startType:
121 >                msg="\nERROR Data %s not published in DBS with asked data tiers : the data tier not found is %s !\n  Check the data_tier variable in crab.cfg !"%(self.dbsdataset,tier)
122 >                raise  NoDataTierinProvenanceError(msg)
123  
124  
125   # #################################################
126      def getMaxEvents(self):
127          """
128 <         max events of the primary dataset-owner
128 >        max events of the primary dataset-owner
129          """
130          ## loop over the fileblocks of the primary dataset-owner
131          nevts=0      
132          for blockevts in self.blocksinfo.values():
133 <          nevts=nevts+blockevts
133 >            nevts=nevts+blockevts
134  
135          return nevts
136  
137   # #################################################
138      def getDBSPaths(self):
139          """
140 <         list the DBSpaths for all required data
140 >        list the DBSpaths for all required data
141          """
142          return self.dbspaths
143  
144   # #################################################
145      def getEVC(self):
146          """
147 <         list the event collections structure by fileblock
147 >        list the event collections structure by fileblock
148          """
149          print "To be used by a more complex job splitting... TODO later... "
150          print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
# Line 159 | Line 152 | class DataDiscovery:
152   # #################################################
153      def getFileBlocks(self):
154          """
155 <         fileblocks for all required dataset-owners
155 >        fileblocks for all required dataset-owners
156          """
157          return self.allblocks        
158  
159   ########################################################################
167
168

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines