1 |
asciaba |
1.1 |
#!/usr/bin/env python
|
2 |
|
|
#
|
3 |
|
|
# Given a dataset, a total number of events a a number of events per job,
|
4 |
|
|
# it generates a pattern file for the Job Robot
|
5 |
|
|
# Usage: jr_dataset -d <dataset> -t <totevts> -j <evtsperjob>
|
6 |
|
|
#
|
7 |
|
|
import sys
|
8 |
|
|
import re
|
9 |
|
|
import getopt
|
10 |
|
|
import urllib
|
11 |
|
|
from HTMLParser import HTMLParser
|
12 |
|
|
|
13 |
|
|
# Parsing class
|
14 |
|
|
class MyHTMLParser(HTMLParser):
|
15 |
|
|
def handle_data(self, data):
|
16 |
|
|
pat = re.compile(r'\..+\.')
|
17 |
|
|
if pat.search(data):
|
18 |
asciaba |
1.2 |
print '%s %s %s %s' % (dataset, data.strip(), totalevts, evtsjob)
|
19 |
asciaba |
1.1 |
|
20 |
|
|
# Main program
|
21 |
|
|
try:
|
22 |
|
|
opts, args = getopt.getopt(sys.argv[1:], "d:t:j:")
|
23 |
|
|
except getopt.GetoptError:
|
24 |
|
|
sys.stderr.write('Wrong parameters\n')
|
25 |
|
|
sys.exit(1)
|
26 |
|
|
dataset = None
|
27 |
|
|
totalevts = None
|
28 |
|
|
evtsjob = None
|
29 |
|
|
for o, a in opts:
|
30 |
|
|
if o == '-d':
|
31 |
|
|
dataset = a
|
32 |
|
|
if o == '-t':
|
33 |
|
|
totalevts = a
|
34 |
|
|
if o == '-j':
|
35 |
|
|
evtsjob = a
|
36 |
|
|
if not dataset or not totalevts or not evtsjob:
|
37 |
|
|
sys.stderr.write('Some arguments not defined\n')
|
38 |
|
|
sys.exit(1)
|
39 |
|
|
|
40 |
|
|
# Load HTML page
|
41 |
|
|
url = 'https://cmsweb.cern.ch/dbs_discovery/aSearchShowAll?case=on&cff=0&caseSensitive=on&userInput=find%20site%20where%20dataset%3D' + dataset + '&grid=0&fromRow=0&xml=0&sortName=&dbsInst=cms_dbs_prod_global&html=1&limit=-1&sortOrder=desc&output=site&tabCol=&userMode=user&method=dbsapi'
|
42 |
|
|
try:
|
43 |
|
|
f = urllib.urlopen(url)
|
44 |
|
|
except IOError:
|
45 |
|
|
sys.stderr.write("Could not connect\n")
|
46 |
|
|
sys.exit(1)
|
47 |
|
|
p = MyHTMLParser()
|
48 |
|
|
|
49 |
|
|
# Parse HTML
|
50 |
|
|
for line in f.readlines():
|
51 |
|
|
p.feed(line)
|
52 |
|
|
p.close()
|
53 |
|
|
f.close()
|