ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/DataDiscovery.py
Revision: 1.1
Committed: Mon Nov 7 23:17:44 2005 UTC (19 years, 5 months ago) by afanfani
Content type: text/x-python
Branch: MAIN
Log Message:
class to perfom data discovery based on DBS

File Contents

# User Rev Content
1 afanfani 1.1 #!/usr/bin/env python2
2     import sys, os, string, re
3     from DBSInfo import *
4    
5     # ####################################
6     class DataDiscoveryError:
7     def __init__(self):
8     print '\nERROR accessing Data Discovery\n'
9     pass
10     # ####################################
11     class DatasetContentsError:
12     def __init__(self):
13     print '\nERROR accessing Data Discovery : getDatasetContents\n'
14     pass
15    
16     # ####################################
17     class DatasetProvenanceError:
18     def __init__(self):
19     print '\nERROR accessing Data Discovery : getDatasetProvenance\n'
20     pass
21    
22     # ####################################
23     # class to find and extact info from published data
24     class DataDiscovery:
25     def __init__(self, owner, dataset, dataTiers, cfg_params):
26    
27     # Attributes
28     self.dbsdataset=dataset+'/datatier/'+owner
29     self.dataTiers = dataTiers
30     self.cfg_params = cfg_params
31    
32     self.dbspaths= [] # DBS output: list of dbspaths for all data
33     self.allblocks = [] # DBS output: list of map fileblocks-totevts for all dataset-owners
34     self.blocksinfo = {} # DBS output: map fileblocks-totevts for the primary block, used internally to this class
35     #DBS output: max events computed by method getMaxEvents
36    
37     # ####################################
38     def fetchDBSInfo(self):
39     """
40     Contact DBS
41     """
42     parents = []
43     parentsblocksinfo = {}
44     self.dbspaths.append("/"+self.dbsdataset) # add the primary dbspath
45     # it might be replaced if one get from DBSAPI the primary dbspath as well
46    
47     dbs=DBSInfo(self.dbsdataset,self.dataTiers)
48     try:
49     self.blocksinfo=dbs.getDatasetContents()
50     except dbs.DBSError:
51     raise DataDiscoveryError
52     try:
53     parents=dbs.getDatasetProvenance()
54     except:
55     raise DataDiscoveryError
56    
57     ## for each parent get the corresponding fileblocks
58     for aparent in parents:
59     ## fill the map dataset-owner for the parents
60     #pdataset=string.split(aparent,'/')[1]
61     #powner=string.split(aparent,'/')[3]
62     #self.dataset_owner[powner]=pdataset
63     ## instead of the map dataset-owner use the dbspaths
64     parentdbsdataset=aparent.getDatasetPath()
65     self.dbspaths.append(parentdbsdataset)
66     #self.dbspaths.append(aparent)
67     ## get the fileblocks of the parents : FIXME remove the first / in the path
68     pdbs=DBSInfo(parentdbsdataset[1:-1],[])
69     try:
70     parentsblocksinfo=pdbs.getDatasetContents()
71     except:
72     raise DataDiscoveryError
73    
74     self.allblocks.append(parentsblocksinfo.keys()) # add parent fileblocksinfo
75    
76     ## all the required blocks
77     self.allblocks.append(self.blocksinfo.keys()) # add also the primary fileblocksinfo
78    
79    
80     # #################################################
81     def getMaxEvents(self):
82     """
83     max events of the primary dataset-owner
84     """
85     ## loop over the fileblocks of the primary dataset-owner
86     nevts=0
87     for blockevts in self.blocksinfo.values():
88     nevts=nevts+blockevts
89    
90     return nevts
91    
92     # #################################################
93     def getDatasetOwnerPairs(self):
94     """
95     list all required dataset-owner pairs
96     """
97     return self.dataset_owner
98     # #################################################
99     def getDBSPaths(self):
100     """
101     list the DBSpaths for all required data
102     """
103     return self.dbspaths
104    
105     # #################################################
106     def getEVC(self):
107     """
108     list the event collections structure by fileblock
109     """
110     print "To be used by a more complex job splitting... TODO later... "
111     print "it requires changes in what's returned by DBSInfo.getDatasetContents and then fetchDBSInfo"
112    
113     # #################################################
114     def getFileBlocks(self):
115     """
116     fileblocks for all required dataset-owners
117     """
118     return self.allblocks
119    
120     ########################################################################
121    
122