1 |
#!/usr/bin/env python
|
2 |
#
|
3 |
# Given a dataset, a total number of events a a number of events per job,
|
4 |
# it generates a pattern file for the Job Robot
|
5 |
# Usage: jr_dataset -d <dataset> -t <totevts> -j <evtsperjob>
|
6 |
#
|
7 |
import sys
|
8 |
import re
|
9 |
import getopt
|
10 |
import urllib
|
11 |
from HTMLParser import HTMLParser
|
12 |
|
13 |
# Parsing class
|
14 |
class MyHTMLParser(HTMLParser):
|
15 |
def handle_data(self, data):
|
16 |
pat = re.compile(r'\..+\.')
|
17 |
if pat.search(data):
|
18 |
print '%s %s %s %s' % (dataset, data.strip(), totalevts, evtsjob)
|
19 |
|
20 |
# Main program
|
21 |
try:
|
22 |
opts, args = getopt.getopt(sys.argv[1:], "d:t:j:")
|
23 |
except getopt.GetoptError:
|
24 |
sys.stderr.write('Wrong parameters\n')
|
25 |
sys.exit(1)
|
26 |
dataset = None
|
27 |
totalevts = None
|
28 |
evtsjob = None
|
29 |
for o, a in opts:
|
30 |
if o == '-d':
|
31 |
dataset = a
|
32 |
if o == '-t':
|
33 |
totalevts = a
|
34 |
if o == '-j':
|
35 |
evtsjob = a
|
36 |
if not dataset or not totalevts or not evtsjob:
|
37 |
sys.stderr.write('Some arguments not defined\n')
|
38 |
sys.exit(1)
|
39 |
|
40 |
# Load HTML page
|
41 |
url = 'https://cmsweb.cern.ch/dbs_discovery/aSearchShowAll?case=on&cff=0&caseSensitive=on&userInput=find%20site%20where%20dataset%3D' + dataset + '&grid=0&fromRow=0&xml=0&sortName=&dbsInst=cms_dbs_prod_global&html=1&limit=-1&sortOrder=desc&output=site&tabCol=&userMode=user&method=dbsapi'
|
42 |
try:
|
43 |
f = urllib.urlopen(url)
|
44 |
except IOError:
|
45 |
sys.stderr.write("Could not connect\n")
|
46 |
sys.exit(1)
|
47 |
p = MyHTMLParser()
|
48 |
|
49 |
# Parse HTML
|
50 |
for line in f.readlines():
|
51 |
p.feed(line)
|
52 |
p.close()
|
53 |
f.close()
|