VHbb/python/submitThem.py

#! /usr/bin/env python
from optparse import OptionParser
import sys
import time
import os
import shutil

parser = OptionParser()
parser.add_option("-T", "--tag", dest="tag", default="",
                      help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
parser.add_option("-J", "--task", dest="task", default="",
                      help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
parser.add_option("-M", "--mass", dest="mass", default="125",
                      help="Mass for DC or Plots, 110...135")
parser.add_option("-S","--samples",dest="samples",default="",
                      help="samples you want to run on")
parser.add_option("-F", "--folderTag", dest="ftag", default="",
                      help="Creats a new folder structure for outputs or uses an existing one with the given name")
parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
                      help="Number of events per file when splitting.")
parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
                      help="If you share the love of Philipp...")

(opts, args) = parser.parse_args(sys.argv)

import os,shutil,pickle,subprocess,ROOT,re
ROOT.gROOT.SetBatch(True)
from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
import getpass

if opts.tag == "":
        print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
        sys.exit(123)

if opts.task == "":
    print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
    sys.exit(123)


en = opts.tag

#create the list with the samples to run over
samplesList=opts.samples.split(",")
timestamp = time.asctime().replace(' ','_').replace(':','-')

# the list of the config is taken from the path config
pathconfig = BetterConfigParser()
pathconfig.read('%sconfig/paths'%(en))
_configs = pathconfig.get('Configuration','List').split(" ")
configs = [ '%sconfig/'%(en) + c for c in _configs  ]

if not opts.ftag == '':
    tagDir = pathconfig.get('Directories','tagDir')
    DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }

    for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
        try:
            os.stat(DirStruct[keys])
        except:
            os.mkdir(DirStruct[keys])

    pathfile = open('%sconfig/paths'%(en))
    buffer = pathfile.readlines()
    pathfile.close()
    os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
    pathfile = open('%sconfig/paths'%(en),'w')
    for line in buffer:
        if line.startswith('plotpath'):
            line = 'plotpath: %s\n'%DirStruct['plotpath']
        elif line.startswith('logpath'):
            line = 'logpath: %s\n'%DirStruct['logpath']
        elif line.startswith('limits'):
            line = 'limits: %s\n'%DirStruct['limitpath']
        pathfile.write(line)
    pathfile.close()

    #copy config files
    for item in configs:
        shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))


print configs
config = BetterConfigParser()
config.read(configs)


def compile_macro(config,macro):
    """
    Creates the library from a macro using CINT compiling it in scratch to avoid
    problems with the linking in the working nodes.
    Args:
        config: configuration file where the macro path is specified
        macro: macro name to be compiled
    Returns:
        nothing
    """
    submitDir = os.getcwd()
    _macro=macro+'.h'
    library = config.get(macro,'library')
    libDir=os.path.dirname(library)
    os.chdir(libDir)
    if not os.path.exists(library):
        print '@INFO: Compiling ' + _macro
        scratchDir='/scratch/%s/'%(getpass.getuser())
        shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
        os.chdir(scratchDir)
        ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
        shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
    os.chdir(submitDir)
        
compile_macro(config,'BTagReshaping')
compile_macro(config,'VHbbNameSpace')

logPath = config.get("Directories","logpath")
logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
counter = 0

#check if the logPath exist. If not exit
if( not os.path.isdir(logPath) ):
    print '@ERROR : ' + logPath + ': dir not found.'
    print '@ERROR : Create it before submitting '
    print 'Exit'
    sys.exit(-1)
    

repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
def submit(job,repDict):
    global counter
    repDict['job'] = job
    nJob = counter % len(logo)
    counter += 1
    if opts.philipp_love_progress_bars:
        repDict['name'] = '"%s"' %logo[nJob].strip()
    else:
        repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
    command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
    print command
    subprocess.call([command], shell=True)

if opts.task == 'train':
    train_list = (config.get('MVALists','List_for_submitscript')).split(',')
    print train_list
    for item in train_list:
        submit(item,repDict)


if opts.task == 'dc':
    #DC_vars = config.items('Limit')
    DC_vars= (config.get('LimitGeneral','List')).split(',')
    print DC_vars

if opts.task == 'plot':
    Plot_vars= (config.get('Plot_general','List')).split(',')

if not opts.task == 'prep':
    path = config.get("Directories","samplepath")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)

if opts.task == 'plot': 
    repDict['queue'] = 'all.q'
    for item in Plot_vars:
        submit(item,repDict)


elif opts.task == 'dc':
    repDict['queue'] = 'short.q'
    for item in DC_vars:
        if 'ZH%s'%opts.mass in item:
            submit(item,repDict) 
        elif 'ZH' in item and opts.mass == 'all':
            submit(item,repDict)
            
elif opts.task == 'prep':
    if ( opts.samples == ""):
        path = config.get("Directories","PREPin")
        samplesinfo = config.get("Directories","samplesinfo")
        info = ParseInfo(samplesinfo,path)
        for job in info:
            submit(job.name,repDict)

    else:
        for sample in samplesList:
            submit(sample,repDict)
elif opts.task == 'sys' or opts.task == 'syseval':
    path = config.get("Directories","SYSin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions form subsamples
            # TO FIX FOR SPLITTED SAMPLE
            submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)

elif opts.task == 'eval':
    path = config.get("Directories","MVAin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions from subsamples
            if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name  (splitted samples) use the identifier to submit
                print '@INFO: Splitted samples: submit through identifier'
                submit(job.identifier,repDict)
            else: submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)


elif( opts.task == 'split' ):
        path = config.get("Directories","SPLITin")
        samplesinfo = config.get("Directories","samplesinfo")
        repDict['job_id']=opts.nevents_split
        info = ParseInfo(samplesinfo,path)
        if ( opts.samples == "" ):
                for job in info:
                        if (job.subsample): continue #avoid multiple submissions from subsamples
                        submit(job.name,repDict)
        else:
                for sample in samplesList:
                        submit(sample,repDict)

#BDT optimisation
elif opts.task == 'mva_opt':
        total_number_of_steps=1
        setting = ''
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                setting+=par+'='+str(scan_par[0])+':'
                if len(scan_par) > 1 and scan_par[2] != 0:
                        total_number_of_steps+=scan_par[2]
        setting=setting[:-1] # eliminate last column at the end of the setting string
        print setting
        repDict['additional']=setting
        repDict['job_id']=config.get('Optimisation','training')
        submit('OPT_main_set',repDict)
        main_setting=setting

        #Scanning all the parameters found in the training config in the Optimisation sector
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                print par
                if len(scan_par) > 1 and scan_par[2] != 0:
                        for step in range(scan_par[2]):
                                value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
                                print value
                                setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
                                repDict['additional']=setting
#                               repDict['job_id']=config.get('Optimisation','training')
                                submit('OPT_'+par+str(value),repDict)
#                               submit(config.get('Optimisation','training'),repDict)
                                print setting


os.system('qstat')
if (opts.philipp_love_progress_bars):
        os.system('./qstat.py') 
Revision:	1.35
Committed:	Thu Mar 21 14:18:17 2013 UTC (12 years, 1 month ago) by bortigno
Content type:	text/x-python
Branch:	MAIN
Changes since 1.34:	+33 -13 lines
Log Message:	@FIX + ADD: list of the config from the path config. (not hardcoded anymore). Add function to compile macro from scratch (as meister suggests) + copy back.
#	Content
1	#! /usr/bin/env python
2	from optparse import OptionParser
3	import sys
4	import time
5	import os
6	import shutil
7
8	parser = OptionParser()
9	parser.add_option("-T", "--tag", dest="tag", default="",
10	help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
11	parser.add_option("-J", "--task", dest="task", default="",
12	help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
13	parser.add_option("-M", "--mass", dest="mass", default="125",
14	help="Mass for DC or Plots, 110...135")
15	parser.add_option("-S","--samples",dest="samples",default="",
16	help="samples you want to run on")
17	parser.add_option("-F", "--folderTag", dest="ftag", default="",
18	help="Creats a new folder structure for outputs or uses an existing one with the given name")
19	parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
20	help="Number of events per file when splitting.")
21	parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
22	help="If you share the love of Philipp...")
23
24	(opts, args) = parser.parse_args(sys.argv)
25
26	import os,shutil,pickle,subprocess,ROOT,re
27	ROOT.gROOT.SetBatch(True)
28	from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
29	import getpass
30
31	if opts.tag == "":
32	print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
33	sys.exit(123)
34
35	if opts.task == "":
36	print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
37	sys.exit(123)
38
39
40	en = opts.tag
41
42	#create the list with the samples to run over
43	samplesList=opts.samples.split(",")
44	timestamp = time.asctime().replace(' ','_').replace(':','-')
45
46	# the list of the config is taken from the path config
47	pathconfig = BetterConfigParser()
48	pathconfig.read('%sconfig/paths'%(en))
49	_configs = pathconfig.get('Configuration','List').split(" ")
50	configs = [ '%sconfig/'%(en) + c for c in _configs ]
51
52	if not opts.ftag == '':
53	tagDir = pathconfig.get('Directories','tagDir')
54	DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }
55
56	for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
57	try:
58	os.stat(DirStruct[keys])
59	except:
60	os.mkdir(DirStruct[keys])
61
62	pathfile = open('%sconfig/paths'%(en))
63	buffer = pathfile.readlines()
64	pathfile.close()
65	os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
66	pathfile = open('%sconfig/paths'%(en),'w')
67	for line in buffer:
68	if line.startswith('plotpath'):
69	line = 'plotpath: %s\n'%DirStruct['plotpath']
70	elif line.startswith('logpath'):
71	line = 'logpath: %s\n'%DirStruct['logpath']
72	elif line.startswith('limits'):
73	line = 'limits: %s\n'%DirStruct['limitpath']
74	pathfile.write(line)
75	pathfile.close()
76
77	#copy config files
78	for item in configs:
79	shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))
80
81
82	print configs
83	config = BetterConfigParser()
84	config.read(configs)
85
86
87	def compile_macro(config,macro):
88	"""
89	Creates the library from a macro using CINT compiling it in scratch to avoid
90	problems with the linking in the working nodes.
91	Args:
92	config: configuration file where the macro path is specified
93	macro: macro name to be compiled
94	Returns:
95	nothing
96	"""
97	submitDir = os.getcwd()
98	_macro=macro+'.h'
99	library = config.get(macro,'library')
100	libDir=os.path.dirname(library)
101	os.chdir(libDir)
102	if not os.path.exists(library):
103	print '@INFO: Compiling ' + _macro
104	scratchDir='/scratch/%s/'%(getpass.getuser())
105	shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
106	os.chdir(scratchDir)
107	ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
108	shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
109	os.chdir(submitDir)
110
111	compile_macro(config,'BTagReshaping')
112	compile_macro(config,'VHbbNameSpace')
113
114	logPath = config.get("Directories","logpath")
115	logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
116	counter = 0
117
118	#check if the logPath exist. If not exit
119	if( not os.path.isdir(logPath) ):
120	print '@ERROR : ' + logPath + ': dir not found.'
121	print '@ERROR : Create it before submitting '
122	print 'Exit'
123	sys.exit(-1)
124
125
126	repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
127	def submit(job,repDict):
128	global counter
129	repDict['job'] = job
130	nJob = counter % len(logo)
131	counter += 1
132	if opts.philipp_love_progress_bars:
133	repDict['name'] = '"%s"' %logo[nJob].strip()
134	else:
135	repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
136	command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
137	print command
138	subprocess.call([command], shell=True)
139
140	if opts.task == 'train':
141	train_list = (config.get('MVALists','List_for_submitscript')).split(',')
142	print train_list
143	for item in train_list:
144	submit(item,repDict)
145
146
147	if opts.task == 'dc':
148	#DC_vars = config.items('Limit')
149	DC_vars= (config.get('LimitGeneral','List')).split(',')
150	print DC_vars
151
152	if opts.task == 'plot':
153	Plot_vars= (config.get('Plot_general','List')).split(',')
154
155	if not opts.task == 'prep':
156	path = config.get("Directories","samplepath")
157	samplesinfo = config.get("Directories","samplesinfo")
158	info = ParseInfo(samplesinfo,path)
159
160	if opts.task == 'plot':
161	repDict['queue'] = 'all.q'
162	for item in Plot_vars:
163	submit(item,repDict)
164
165
166	elif opts.task == 'dc':
167	repDict['queue'] = 'short.q'
168	for item in DC_vars:
169	if 'ZH%s'%opts.mass in item:
170	submit(item,repDict)
171	elif 'ZH' in item and opts.mass == 'all':
172	submit(item,repDict)
173
174	elif opts.task == 'prep':
175	if ( opts.samples == ""):
176	path = config.get("Directories","PREPin")
177	samplesinfo = config.get("Directories","samplesinfo")
178	info = ParseInfo(samplesinfo,path)
179	for job in info:
180	submit(job.name,repDict)
181
182	else:
183	for sample in samplesList:
184	submit(sample,repDict)
185	elif opts.task == 'sys' or opts.task == 'syseval':
186	path = config.get("Directories","SYSin")
187	samplesinfo = config.get("Directories","samplesinfo")
188	info = ParseInfo(samplesinfo,path)
189	if ( opts.samples == ""):
190	for job in info:
191	if (job.subsample):
192	continue #avoid multiple submissions form subsamples
193	# TO FIX FOR SPLITTED SAMPLE
194	submit(job.name,repDict)
195	else:
196	for sample in samplesList:
197	submit(sample,repDict)
198
199	elif opts.task == 'eval':
200	path = config.get("Directories","MVAin")
201	samplesinfo = config.get("Directories","samplesinfo")
202	info = ParseInfo(samplesinfo,path)
203	if ( opts.samples == ""):
204	for job in info:
205	if (job.subsample):
206	continue #avoid multiple submissions from subsamples
207	if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name (splitted samples) use the identifier to submit
208	print '@INFO: Splitted samples: submit through identifier'
209	submit(job.identifier,repDict)
210	else: submit(job.name,repDict)
211	else:
212	for sample in samplesList:
213	submit(sample,repDict)
214
215
216	elif( opts.task == 'split' ):
217	path = config.get("Directories","SPLITin")
218	samplesinfo = config.get("Directories","samplesinfo")
219	repDict['job_id']=opts.nevents_split
220	info = ParseInfo(samplesinfo,path)
221	if ( opts.samples == "" ):
222	for job in info:
223	if (job.subsample): continue #avoid multiple submissions from subsamples
224	submit(job.name,repDict)
225	else:
226	for sample in samplesList:
227	submit(sample,repDict)
228
229	#BDT optimisation
230	elif opts.task == 'mva_opt':
231	total_number_of_steps=1
232	setting = ''
233	for par in (config.get('Optimisation','parameters').split(',')):
234	scan_par=eval(config.get('Optimisation',par))
235	setting+=par+'='+str(scan_par[0])+':'
236	if len(scan_par) > 1 and scan_par[2] != 0:
237	total_number_of_steps+=scan_par[2]
238	setting=setting[:-1] # eliminate last column at the end of the setting string
239	print setting
240	repDict['additional']=setting
241	repDict['job_id']=config.get('Optimisation','training')
242	submit('OPT_main_set',repDict)
243	main_setting=setting
244
245	#Scanning all the parameters found in the training config in the Optimisation sector
246	for par in (config.get('Optimisation','parameters').split(',')):
247	scan_par=eval(config.get('Optimisation',par))
248	print par
249	if len(scan_par) > 1 and scan_par[2] != 0:
250	for step in range(scan_par[2]):
251	value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
252	print value
253	setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
254	repDict['additional']=setting
255	# repDict['job_id']=config.get('Optimisation','training')
256	submit('OPT_'+par+str(value),repDict)
257	# submit(config.get('Optimisation','training'),repDict)
258	print setting
259
260
261	os.system('qstat')
262	if (opts.philipp_love_progress_bars):
263	os.system('./qstat.py')