VHbb/python/submitThem.py

#! /usr/bin/env python
from optparse import OptionParser
import sys
import time
import os
import shutil

parser = OptionParser()
parser.add_option("-T", "--tag", dest="tag", default="8TeV",
                      help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
parser.add_option("-J", "--task", dest="task", default="",
                      help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
parser.add_option("-M", "--mass", dest="mass", default="125",
              help="Mass for DC or Plots, 110...135")
parser.add_option("-S","--samples",dest="samples",default="",
              help="samples you want to run on")
parser.add_option("-F", "--folderTag", dest="ftag", default="",
                      help="Creats a new folder structure for outputs or uses an existing one with the given name")
parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
                      help="Number of events per file when splitting.")
parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
                      help="If you share the love of Philipp...")

(opts, args) = parser.parse_args(sys.argv)

import os,shutil,pickle,subprocess,ROOT,re
ROOT.gROOT.SetBatch(True)
from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
import getpass

if opts.tag == "":
    print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
    sys.exit(123)

if opts.task == "":
    print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
    sys.exit(123)


en = opts.tag

#create the list with the samples to run over
samplesList=opts.samples.split(",")
timestamp = time.asctime().replace(' ','_').replace(':','-')

# the list of the config is taken from the path config
pathconfig = BetterConfigParser()
pathconfig.read('%sconfig/paths'%(en))
_configs = pathconfig.get('Configuration','List').split(" ")
configs = [ '%sconfig/'%(en) + c for c in _configs  ]

if not opts.ftag == '':
    tagDir = pathconfig.get('Directories','tagDir')
    DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }

    for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
        try:
            os.stat(DirStruct[keys])
        except:
            os.mkdir(DirStruct[keys])

    pathfile = open('%sconfig/paths'%(en))
    buffer = pathfile.readlines()
    pathfile.close()
    os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
    pathfile = open('%sconfig/paths'%(en),'w')
    for line in buffer:
        if line.startswith('plotpath'):
            line = 'plotpath: %s\n'%DirStruct['plotpath']
        elif line.startswith('logpath'):
            line = 'logpath: %s\n'%DirStruct['logpath']
        elif line.startswith('limits'):
            line = 'limits: %s\n'%DirStruct['limitpath']
        pathfile.write(line)
    pathfile.close()

    #copy config files
    for item in configs:
        shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))


print configs
config = BetterConfigParser()
config.read(configs)

def dump_config(configs,output_file):
    """
    Dump all the configs in a output file
    Args:
        output_file: the file where the log will be dumped 
        configs: list of files (string) to be dumped
    Returns:
        nothing
    """
    outf = open(output_file,'w') 
    for i in configs:
        try:
            f=open(i,'r')
            outf.write(f.read())
        except: print '@WARNING: Config' + i + ' not found. It will not be used.'

def compile_macro(config,macro):
    """
    Creates the library from a macro using CINT compiling it in scratch to avoid
    problems with the linking in the working nodes.
    Args:
        config: configuration file where the macro path is specified
        macro: macro name to be compiled
    Returns:
        nothing
    """
    submitDir = os.getcwd()
    _macro=macro+'.h'
    library = config.get(macro,'library')
    libDir=os.path.dirname(library)
    os.chdir(libDir)
    if not os.path.exists(library):
        print '@INFO: Compiling ' + _macro
        scratchDir='/scratch/%s/'%(getpass.getuser())
        shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
        os.chdir(scratchDir)
        ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
        shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
    os.chdir(submitDir)
        
compile_macro(config,'BTagReshaping')
compile_macro(config,'VHbbNameSpace')

logPath = config.get("Directories","logpath")
logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
counter = 0

#check if the logPath exist. If not exit
if( not os.path.isdir(logPath) ):
    print '@ERROR : ' + logPath + ': dir not found.'
    print '@ERROR : Create it before submitting '
    print 'Exit'
    sys.exit(-1)
    
repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
def submit(job,repDict):
    global counter
    repDict['job'] = job
    nJob = counter % len(logo)
    counter += 1
    if opts.philipp_love_progress_bars:
        repDict['name'] = '"%s"' %logo[nJob].strip()
    else:
        repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
    command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
    print command
    dump_config(configs,"%(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.config" %(repDict))
    subprocess.call([command], shell=True)

if opts.task == 'train':
    train_list = (config.get('MVALists','List_for_submitscript')).split(',')
    print train_list
    for item in train_list:
        submit(item,repDict)


if opts.task == 'dc':
    #DC_vars = config.items('Limit')
    DC_vars= (config.get('LimitGeneral','List')).split(',')
    print DC_vars

if opts.task == 'plot':
    Plot_vars= (config.get('Plot_general','List')).split(',')

if not opts.task == 'prep':
    path = config.get("Directories","samplepath")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)

if opts.task == 'plot': 
    repDict['queue'] = 'all.q'
    for item in Plot_vars:
        submit(item,repDict)

if opts.task == 'trainReg':
    repDict['queue'] = 'all.q'
    submit('trainReg',repDict)


elif opts.task == 'dc':
    repDict['queue'] = 'all.q'
    for item in DC_vars:
        if 'ZH%s'%opts.mass in item:
            submit(item,repDict) 
        elif opts.mass == 'all':
            submit(item,repDict)
            
elif opts.task == 'prep':
    if ( opts.samples == ""):
        path = config.get("Directories","PREPin")
        samplesinfo = config.get("Directories","samplesinfo")
        info = ParseInfo(samplesinfo,path)
        for job in info:
            submit(job.name,repDict)

    else:
        for sample in samplesList:
            submit(sample,repDict)
elif opts.task == 'sys' or opts.task == 'syseval':
    path = config.get("Directories","SYSin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if opts.samples == "":
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions form subsamples
            # TO FIX FOR SPLITTED SAMPLE
            submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)

elif opts.task == 'eval':
    repDict['queue'] = 'long.q'
    path = config.get("Directories","MVAin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if opts.samples == "":
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions from subsamples
            if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name  (splitted samples) use the identifier to submit
                print '@INFO: Splitted samples: submit through identifier'
                submit(job.identifier,repDict)
            else: submit(job.name,repDict)
    else:
        for sample in samplesList:
            print sample
            submit(sample,repDict)


elif( opts.task == 'split' ):
        path = config.get("Directories","SPLITin")
        samplesinfo = config.get("Directories","samplesinfo")
        repDict['job_id']=opts.nevents_split
        info = ParseInfo(samplesinfo,path)
        if ( opts.samples == "" ):
                for job in info:
                        if (job.subsample): continue #avoid multiple submissions from subsamples
                        submit(job.name,repDict)
        else:
                for sample in samplesList:
                        submit(sample,repDict)

#BDT optimisation
elif opts.task == 'mva_opt':
    total_number_of_steps=1
    setting = ''
    for par in (config.get('Optimisation','parameters').split(',')):
        scan_par=eval(config.get('Optimisation',par))
        setting+=par+'='+str(scan_par[0])+':'
        if len(scan_par) > 1 and scan_par[2] != 0:
            total_number_of_steps+=scan_par[2]
    setting=setting[:-1] # eliminate last column at the end of the setting string
    print setting
    repDict['additional']=setting
    repDict['job_id']=config.get('Optimisation','training')
    submit('OPT_main_set',repDict)
    main_setting=setting

    #Scanning all the parameters found in the training config in the Optimisation sector
    for par in (config.get('Optimisation','parameters').split(',')):
        scan_par=eval(config.get('Optimisation',par))
        print par
        if len(scan_par) > 1 and scan_par[2] != 0:
            for step in range(scan_par[2]):
                value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
                print value
                setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
                repDict['additional']=setting
#               repDict['job_id']=config.get('Optimisation','training')
                submit('OPT_'+par+str(value),repDict)
#               submit(config.get('Optimisation','training'),repDict)
                print setting


os.system('qstat')
if (opts.philipp_love_progress_bars):
    os.system('./qstat.py') 
Revision:	1.39
Committed:	Mon Apr 8 08:02:00 2013 UTC (12 years, 1 month ago) by peller
Content type:	text/x-python
Branch:	MAIN
CVS Tags:	lhcp_UnblindFix, hcp_Unblind, lhcp_11April, LHCP_PreAppFixAfterFreeze, HEAD
Changes since 1.38:	+37 -37 lines
Error occurred while calculating annotation data.
Log Message:	set default to 8TeV
#	Content
1	#! /usr/bin/env python
2	from optparse import OptionParser
3	import sys
4	import time
5	import os
6	import shutil
7
8	parser = OptionParser()
9	parser.add_option("-T", "--tag", dest="tag", default="8TeV",
10	help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
11	parser.add_option("-J", "--task", dest="task", default="",
12	help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
13	parser.add_option("-M", "--mass", dest="mass", default="125",
14	help="Mass for DC or Plots, 110...135")
15	parser.add_option("-S","--samples",dest="samples",default="",
16	help="samples you want to run on")
17	parser.add_option("-F", "--folderTag", dest="ftag", default="",
18	help="Creats a new folder structure for outputs or uses an existing one with the given name")
19	parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
20	help="Number of events per file when splitting.")
21	parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
22	help="If you share the love of Philipp...")
23
24	(opts, args) = parser.parse_args(sys.argv)
25
26	import os,shutil,pickle,subprocess,ROOT,re
27	ROOT.gROOT.SetBatch(True)
28	from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
29	import getpass
30
31	if opts.tag == "":
32	print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
33	sys.exit(123)
34
35	if opts.task == "":
36	print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
37	sys.exit(123)
38
39
40	en = opts.tag
41
42	#create the list with the samples to run over
43	samplesList=opts.samples.split(",")
44	timestamp = time.asctime().replace(' ','_').replace(':','-')
45
46	# the list of the config is taken from the path config
47	pathconfig = BetterConfigParser()
48	pathconfig.read('%sconfig/paths'%(en))
49	_configs = pathconfig.get('Configuration','List').split(" ")
50	configs = [ '%sconfig/'%(en) + c for c in _configs ]
51
52	if not opts.ftag == '':
53	tagDir = pathconfig.get('Directories','tagDir')
54	DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }
55
56	for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
57	try:
58	os.stat(DirStruct[keys])
59	except:
60	os.mkdir(DirStruct[keys])
61
62	pathfile = open('%sconfig/paths'%(en))
63	buffer = pathfile.readlines()
64	pathfile.close()
65	os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
66	pathfile = open('%sconfig/paths'%(en),'w')
67	for line in buffer:
68	if line.startswith('plotpath'):
69	line = 'plotpath: %s\n'%DirStruct['plotpath']
70	elif line.startswith('logpath'):
71	line = 'logpath: %s\n'%DirStruct['logpath']
72	elif line.startswith('limits'):
73	line = 'limits: %s\n'%DirStruct['limitpath']
74	pathfile.write(line)
75	pathfile.close()
76
77	#copy config files
78	for item in configs:
79	shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))
80
81
82	print configs
83	config = BetterConfigParser()
84	config.read(configs)
85
86	def dump_config(configs,output_file):
87	"""
88	Dump all the configs in a output file
89	Args:
90	output_file: the file where the log will be dumped
91	configs: list of files (string) to be dumped
92	Returns:
93	nothing
94	"""
95	outf = open(output_file,'w')
96	for i in configs:
97	try:
98	f=open(i,'r')
99	outf.write(f.read())
100	except: print '@WARNING: Config' + i + ' not found. It will not be used.'
101
102	def compile_macro(config,macro):
103	"""
104	Creates the library from a macro using CINT compiling it in scratch to avoid
105	problems with the linking in the working nodes.
106	Args:
107	config: configuration file where the macro path is specified
108	macro: macro name to be compiled
109	Returns:
110	nothing
111	"""
112	submitDir = os.getcwd()
113	_macro=macro+'.h'
114	library = config.get(macro,'library')
115	libDir=os.path.dirname(library)
116	os.chdir(libDir)
117	if not os.path.exists(library):
118	print '@INFO: Compiling ' + _macro
119	scratchDir='/scratch/%s/'%(getpass.getuser())
120	shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
121	os.chdir(scratchDir)
122	ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
123	shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
124	os.chdir(submitDir)
125
126	compile_macro(config,'BTagReshaping')
127	compile_macro(config,'VHbbNameSpace')
128
129	logPath = config.get("Directories","logpath")
130	logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
131	counter = 0
132
133	#check if the logPath exist. If not exit
134	if( not os.path.isdir(logPath) ):
135	print '@ERROR : ' + logPath + ': dir not found.'
136	print '@ERROR : Create it before submitting '
137	print 'Exit'
138	sys.exit(-1)
139
140	repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
141	def submit(job,repDict):
142	global counter
143	repDict['job'] = job
144	nJob = counter % len(logo)
145	counter += 1
146	if opts.philipp_love_progress_bars:
147	repDict['name'] = '"%s"' %logo[nJob].strip()
148	else:
149	repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
150	command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
151	print command
152	dump_config(configs,"%(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.config" %(repDict))
153	subprocess.call([command], shell=True)
154
155	if opts.task == 'train':
156	train_list = (config.get('MVALists','List_for_submitscript')).split(',')
157	print train_list
158	for item in train_list:
159	submit(item,repDict)
160
161
162	if opts.task == 'dc':
163	#DC_vars = config.items('Limit')
164	DC_vars= (config.get('LimitGeneral','List')).split(',')
165	print DC_vars
166
167	if opts.task == 'plot':
168	Plot_vars= (config.get('Plot_general','List')).split(',')
169
170	if not opts.task == 'prep':
171	path = config.get("Directories","samplepath")
172	samplesinfo = config.get("Directories","samplesinfo")
173	info = ParseInfo(samplesinfo,path)
174
175	if opts.task == 'plot':
176	repDict['queue'] = 'all.q'
177	for item in Plot_vars:
178	submit(item,repDict)
179
180	if opts.task == 'trainReg':
181	repDict['queue'] = 'all.q'
182	submit('trainReg',repDict)
183
184
185	elif opts.task == 'dc':
186	repDict['queue'] = 'all.q'
187	for item in DC_vars:
188	if 'ZH%s'%opts.mass in item:
189	submit(item,repDict)
190	elif opts.mass == 'all':
191	submit(item,repDict)
192
193	elif opts.task == 'prep':
194	if ( opts.samples == ""):
195	path = config.get("Directories","PREPin")
196	samplesinfo = config.get("Directories","samplesinfo")
197	info = ParseInfo(samplesinfo,path)
198	for job in info:
199	submit(job.name,repDict)
200
201	else:
202	for sample in samplesList:
203	submit(sample,repDict)
204	elif opts.task == 'sys' or opts.task == 'syseval':
205	path = config.get("Directories","SYSin")
206	samplesinfo = config.get("Directories","samplesinfo")
207	info = ParseInfo(samplesinfo,path)
208	if opts.samples == "":
209	for job in info:
210	if (job.subsample):
211	continue #avoid multiple submissions form subsamples
212	# TO FIX FOR SPLITTED SAMPLE
213	submit(job.name,repDict)
214	else:
215	for sample in samplesList:
216	submit(sample,repDict)
217
218	elif opts.task == 'eval':
219	repDict['queue'] = 'long.q'
220	path = config.get("Directories","MVAin")
221	samplesinfo = config.get("Directories","samplesinfo")
222	info = ParseInfo(samplesinfo,path)
223	if opts.samples == "":
224	for job in info:
225	if (job.subsample):
226	continue #avoid multiple submissions from subsamples
227	if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name (splitted samples) use the identifier to submit
228	print '@INFO: Splitted samples: submit through identifier'
229	submit(job.identifier,repDict)
230	else: submit(job.name,repDict)
231	else:
232	for sample in samplesList:
233	print sample
234	submit(sample,repDict)
235
236
237	elif( opts.task == 'split' ):
238	path = config.get("Directories","SPLITin")
239	samplesinfo = config.get("Directories","samplesinfo")
240	repDict['job_id']=opts.nevents_split
241	info = ParseInfo(samplesinfo,path)
242	if ( opts.samples == "" ):
243	for job in info:
244	if (job.subsample): continue #avoid multiple submissions from subsamples
245	submit(job.name,repDict)
246	else:
247	for sample in samplesList:
248	submit(sample,repDict)
249
250	#BDT optimisation
251	elif opts.task == 'mva_opt':
252	total_number_of_steps=1
253	setting = ''
254	for par in (config.get('Optimisation','parameters').split(',')):
255	scan_par=eval(config.get('Optimisation',par))
256	setting+=par+'='+str(scan_par[0])+':'
257	if len(scan_par) > 1 and scan_par[2] != 0:
258	total_number_of_steps+=scan_par[2]
259	setting=setting[:-1] # eliminate last column at the end of the setting string
260	print setting
261	repDict['additional']=setting
262	repDict['job_id']=config.get('Optimisation','training')
263	submit('OPT_main_set',repDict)
264	main_setting=setting
265
266	#Scanning all the parameters found in the training config in the Optimisation sector
267	for par in (config.get('Optimisation','parameters').split(',')):
268	scan_par=eval(config.get('Optimisation',par))
269	print par
270	if len(scan_par) > 1 and scan_par[2] != 0:
271	for step in range(scan_par[2]):
272	value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
273	print value
274	setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
275	repDict['additional']=setting
276	# repDict['job_id']=config.get('Optimisation','training')
277	submit('OPT_'+par+str(value),repDict)
278	# submit(config.get('Optimisation','training'),repDict)
279	print setting
280
281
282	os.system('qstat')
283	if (opts.philipp_love_progress_bars):
284	os.system('./qstat.py')