ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/DBS/Clients/CRABStandalone/DataDiscovery.py
(Generate patch)

Comparing COMP/DBS/Clients/CRABStandalone/DataDiscovery.py (file contents):
Revision 1.1 by afanfani, Thu Oct 27 15:26:24 2005 UTC vs.
Revision 1.4 by afanfani, Sat Jan 28 23:09:20 2006 UTC

# Line 1 | Line 1
1   #!/usr/bin/env python2
2   import sys, os, string, re
3 import urllib
4 import urllib2
3   from DBSInfo import *
4  
5 +
6 + # ####################################
7 + class DataDiscoveryError(exceptions.Exception):
8 +  def __init__(self, errorMessage):
9 +   args=errorMessage
10 +   exceptions.Exception.__init__(self, args)
11 +   pass
12 +
13 +  def getErrorMessage(self):
14 +   """ Return exception error """
15 +   return "%s" % (self.args)
16 +
17 + # ####################################
18 + class NotExistingDatasetError(exceptions.Exception):
19 +  def __init__(self, errorMessage):
20 +   args=errorMessage
21 +   exceptions.Exception.__init__(self, args)
22 +   pass
23 +
24 +  def getErrorMessage(self):
25 +   """ Return exception error """
26 +   return "%s" % (self.args)
27 +
28   # ####################################
29 < class DataDiscoveryError:
30 <    def __init__(self):
31 <        print '\nERROR accessing Data Discovery\n'
32 <        pass
29 > class NoDataTierinProvenanceError(exceptions.Exception):
30 >  def __init__(self, errorMessage):
31 >   args=errorMessage
32 >   exceptions.Exception.__init__(self, args)
33 >   pass
34 >
35 >  def getErrorMessage(self):
36 >   """ Return exception error """
37 >   return "%s" % (self.args)
38  
39   # ####################################
40   # class to find and extact info from published data
# Line 16 | Line 42 | class DataDiscovery:
42      def __init__(self, owner, dataset, dataTiers, cfg_params):
43  
44   #       Attributes
45 <        self.owner = owner
20 <        self.dataset = dataset
45 >        self.dbsdataset='/'+dataset+'/datatier/'+owner
46          self.dataTiers = dataTiers
47          self.cfg_params = cfg_params
48  
49 <        self.dataset_owner = {}  # DBS output: map dataset-owner for all data
49 >        self.dbspaths= []     # DBS output: list of dbspaths for all data
50          self.allblocks = []   # DBS output: list of map fileblocks-totevts for all dataset-owners
51 <        self.blocksinfo = {}     # DBS output: map fileblocks-totevts for the primary block, used internally to this class
51 >        self.blocksinfo = {}  # DBS output: map fileblocks-totevts for the primary block, used internally to this class
52   #DBS output: max events computed by method getMaxEvents
53  
54   # ####################################
# Line 31 | Line 56 | class DataDiscovery:
56          """
57          Contact DBS
58          """
59 <        parents = {}
59 >        parents = []
60          parentsblocksinfo = {}
36        self.dataset_owner[self.owner]=self.dataset  # add the map dataset-owner
61  
62 <        dbs=DBSInfo(self.owner,self.dataset,self.dataTiers)
62 >        ## add the PU among the required data tiers if the Digi are requested
63 >        if (self.dataTiers.count('Digi')>0) & (self.dataTiers.count('PU')<=0) :
64 >          self.dataTiers.append('PU')
65 >
66 >        ## get info about the requested dataset
67 >        dbs=DBSInfo(self.dbsdataset,self.dataTiers)
68          try:
69            self.blocksinfo=dbs.getDatasetContents()
70 <        except:
71 <          raise DataDiscoveryError
70 >        except DBSError, ex:
71 >          raise DataDiscoveryError(ex.getErrorMessage())
72 >        
73 >        if len(self.blocksinfo)<=0:
74 >         msg="\nData %s do not exist in DBS! \n Check the dataset/owner variables in crab.cfg !"%self.dbsdataset
75 >         raise NotExistingDatasetError(msg)
76 >
77 >        currentdatatier=string.split(self.blocksinfo.keys()[0],'/')[2]
78 >        fakedatatier=string.split(self.dbsdataset,'/')[2]
79 >        currentdbsdataset=string.replace(self.dbsdataset, fakedatatier, currentdatatier)  
80 >
81 >        self.dbspaths.append(currentdbsdataset)    # add the requested dbspath
82  
83 +        ## get info about the parents
84          try:
85            parents=dbs.getDatasetProvenance()
86 <        except:
87 <          raise DataDiscoveryError
86 >        except DBSInvalidDataTierError, ex:
87 >          msg=ex.getErrorMessage()+' \n Check the data_tier variable in crab.cfg !\n'
88 >          raise DataDiscoveryError(msg)
89 >        except DBSError, ex:
90 >          raise DataDiscoveryError(ex.getErrorMessage())
91 >
92 >        ## check that the user asks for parent Data Tier really existing in the DBS provenance
93 >        self.checkParentDataTier(parents, self.dataTiers, currentdbsdataset)
94  
95          ## for each parent get the corresponding fileblocks
96          for aparent in parents:
97 <           ## fill the map dataset-owner for the parents
98 <           pdataset=string.split(aparent,'/')[1]
99 <           powner=string.split(aparent,'/')[3]
100 <           self.dataset_owner[powner]=pdataset
101 <           ## get the fileblocks of the parents
102 <           pdbs=DBSInfo(powner,pdataset,[])
103 <           parentsblocksinfo=pdbs.getDatasetContents()
97 >           ## fill a list of dbspaths
98 >           parentdbsdataset=aparent.getDatasetPath()
99 >           self.dbspaths.append(parentdbsdataset)
100 >           pdbs=DBSInfo(parentdbsdataset,[])
101 >           try:
102 >             parentsblocksinfo=pdbs.getDatasetContents()
103 >           except DBSError, ex:
104 >            raise DataDiscoveryError(ex.getErrorMessage())
105 >
106             self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
107  
108          ## all the required blocks
109 <        self.allblocks.append(self.blocksinfo.keys()) # add also the primary fileblocksinfo
109 >        self.allblocks.append(self.blocksinfo.keys()) # add also the current fileblocksinfo
110 >
111 >
112 > # #################################################
113 >    def checkParentDataTier(self, parents, user_datatiers, currentdbsdataset ):
114 >        """
115 >         check that the data tiers requested by the user really exists in the provenance of the given dataset
116 >        """
117 >
118 >        current_datatier=string.split(currentdbsdataset,'/')[2]
119 >
120 >        parent_datatypes=[]
121 >        for aparent in parents:
122 >          parent_datatypes.append(aparent.getDataType())
123 >
124 >        for datatier in user_datatiers:
125 >          if parent_datatypes.count(datatier)<=0:
126 >             # the current datatier is not supposed to be in the provenance
127 >             if not (datatier == current_datatier):  
128 >              msg="\nData %s not published in DBS with asked data tiers : the data tier not found is %s !\n  Check the data_tier variable in crab.cfg !"%(currentdbsdataset,datatier)
129 >              raise  NoDataTierinProvenanceError(msg)
130  
131  
132   # #################################################
# Line 74 | Line 142 | class DataDiscovery:
142          return nevts
143  
144   # #################################################
145 <    def getDatasetOwnerPairs(self):
145 >    def getDBSPaths(self):
146          """
147 <         list all required dataset-owner pairs
147 >         list the DBSpaths for all required data
148          """
149 <        return self.dataset_owner
149 >        return self.dbspaths
150  
151   # #################################################
152      def getEVC(self):

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines