VHbb/python/submitThem.py

#! /usr/bin/env python
from optparse import OptionParser
import sys
import time
import os
import shutil

parser = OptionParser()
parser.add_option("-T", "--tag", dest="tag", default="",
                      help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
parser.add_option("-J", "--task", dest="task", default="",
                      help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
parser.add_option("-M", "--mass", dest="mass", default="125",
                      help="Mass for DC or Plots, 110...135")
parser.add_option("-S","--samples",dest="samples",default="",
                      help="samples you want to run on")
parser.add_option("-F", "--folderTag", dest="ftag", default="",
                      help="Creats a new folder structure for outputs or uses an existing one with the given name")
parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
                      help="Number of events per file when splitting.")
parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
                      help="If you share the love of Philipp...")

(opts, args) = parser.parse_args(sys.argv)

import os,shutil,pickle,subprocess,ROOT,re
ROOT.gROOT.SetBatch(True)
from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
import getpass

if opts.tag == "":
        print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
        sys.exit(123)

if opts.task == "":
    print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
    sys.exit(123)


en = opts.tag

#create the list with the samples to run over
samplesList=opts.samples.split(",")
timestamp = time.asctime().replace(' ','_').replace(':','-')

# the list of the config is taken from the path config
pathconfig = BetterConfigParser()
pathconfig.read('%sconfig/paths'%(en))
_configs = pathconfig.get('Configuration','List').split(" ")
configs = [ '%sconfig/'%(en) + c for c in _configs  ]

if not opts.ftag == '':
    tagDir = pathconfig.get('Directories','tagDir')
    DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }

    for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
        try:
            os.stat(DirStruct[keys])
        except:
            os.mkdir(DirStruct[keys])

    pathfile = open('%sconfig/paths'%(en))
    buffer = pathfile.readlines()
    pathfile.close()
    os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
    pathfile = open('%sconfig/paths'%(en),'w')
    for line in buffer:
        if line.startswith('plotpath'):
            line = 'plotpath: %s\n'%DirStruct['plotpath']
        elif line.startswith('logpath'):
            line = 'logpath: %s\n'%DirStruct['logpath']
        elif line.startswith('limits'):
            line = 'limits: %s\n'%DirStruct['limitpath']
        pathfile.write(line)
    pathfile.close()

    #copy config files
    for item in configs:
        shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))


print configs
config = BetterConfigParser()
config.read(configs)

def dump_config(configs,output_file):
    """
    Dump all the configs in a output file
    Args:
        output_file: the file where the log will be dumped 
        configs: list of files (string) to be dumped
    Returns:
        nothing
    """
    outf = open(output_file,'w') 
    for i in configs:
        try:
            f=open(i,'r')
            outf.write(f.read())
        except: print '@WARNING: Config' + i + ' not found. It will not be used.'

def compile_macro(config,macro):
    """
    Creates the library from a macro using CINT compiling it in scratch to avoid
    problems with the linking in the working nodes.
    Args:
        config: configuration file where the macro path is specified
        macro: macro name to be compiled
    Returns:
        nothing
    """
    submitDir = os.getcwd()
    _macro=macro+'.h'
    library = config.get(macro,'library')
    libDir=os.path.dirname(library)
    os.chdir(libDir)
    if not os.path.exists(library):
        print '@INFO: Compiling ' + _macro
        scratchDir='/scratch/%s/'%(getpass.getuser())
        shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
        os.chdir(scratchDir)
        ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
        shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
    os.chdir(submitDir)
        
compile_macro(config,'BTagReshaping')
compile_macro(config,'VHbbNameSpace')

logPath = config.get("Directories","logpath")
logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
counter = 0

#check if the logPath exist. If not exit
if( not os.path.isdir(logPath) ):
    print '@ERROR : ' + logPath + ': dir not found.'
    print '@ERROR : Create it before submitting '
    print 'Exit'
    sys.exit(-1)
    

repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
def submit(job,repDict):
    global counter
    repDict['job'] = job
    nJob = counter % len(logo)
    counter += 1
    if opts.philipp_love_progress_bars:
        repDict['name'] = '"%s"' %logo[nJob].strip()
    else:
        repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
    command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
    print command
    dump_config(configs,"%(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.config" %(repDict))
    subprocess.call([command], shell=True)

if opts.task == 'train':
    train_list = (config.get('MVALists','List_for_submitscript')).split(',')
    print train_list
    for item in train_list:
        submit(item,repDict)


if opts.task == 'dc':
    #DC_vars = config.items('Limit')
    DC_vars= (config.get('LimitGeneral','List')).split(',')
    print DC_vars

if opts.task == 'plot':
    Plot_vars= (config.get('Plot_general','List')).split(',')

if not opts.task == 'prep':
    path = config.get("Directories","samplepath")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)

if opts.task == 'plot': 
    repDict['queue'] = 'all.q'
    for item in Plot_vars:
        submit(item,repDict)

if opts.task == 'trainReg':
    repDict['queue'] = 'all.q'
    submit('trainReg',repDict)


elif opts.task == 'dc':
    repDict['queue'] = 'short.q'
    for item in DC_vars:
        if 'ZH%s'%opts.mass in item:
            submit(item,repDict) 
        elif 'ZH' in item and opts.mass == 'all':
            submit(item,repDict)
            
elif opts.task == 'prep':
    if ( opts.samples == ""):
        path = config.get("Directories","PREPin")
        samplesinfo = config.get("Directories","samplesinfo")
        info = ParseInfo(samplesinfo,path)
        for job in info:
            submit(job.name,repDict)

    else:
        for sample in samplesList:
            submit(sample,repDict)
elif opts.task == 'sys' or opts.task == 'syseval':
    path = config.get("Directories","SYSin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions form subsamples
            # TO FIX FOR SPLITTED SAMPLE
            submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)

elif opts.task == 'eval':
    path = config.get("Directories","MVAin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions from subsamples
            if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name  (splitted samples) use the identifier to submit
                print '@INFO: Splitted samples: submit through identifier'
                submit(job.identifier,repDict)
            else: submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)


elif( opts.task == 'split' ):
        path = config.get("Directories","SPLITin")
        samplesinfo = config.get("Directories","samplesinfo")
        repDict['job_id']=opts.nevents_split
        info = ParseInfo(samplesinfo,path)
        if ( opts.samples == "" ):
                for job in info:
                        if (job.subsample): continue #avoid multiple submissions from subsamples
                        submit(job.name,repDict)
        else:
                for sample in samplesList:
                        submit(sample,repDict)

#BDT optimisation
elif opts.task == 'mva_opt':
        total_number_of_steps=1
        setting = ''
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                setting+=par+'='+str(scan_par[0])+':'
                if len(scan_par) > 1 and scan_par[2] != 0:
                        total_number_of_steps+=scan_par[2]
        setting=setting[:-1] # eliminate last column at the end of the setting string
        print setting
        repDict['additional']=setting
        repDict['job_id']=config.get('Optimisation','training')
        submit('OPT_main_set',repDict)
        main_setting=setting

        #Scanning all the parameters found in the training config in the Optimisation sector
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                print par
                if len(scan_par) > 1 and scan_par[2] != 0:
                        for step in range(scan_par[2]):
                                value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
                                print value
                                setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
                                repDict['additional']=setting
#                               repDict['job_id']=config.get('Optimisation','training')
                                submit('OPT_'+par+str(value),repDict)
#                               submit(config.get('Optimisation','training'),repDict)
                                print setting


os.system('qstat')
if (opts.philipp_love_progress_bars):
        os.system('./qstat.py') 
Revision:	1.37
Committed:	Wed Mar 27 14:29:38 2013 UTC (12 years, 1 month ago) by bortigno
Content type:	text/x-python
Branch:	MAIN
CVS Tags:	LHCP_PreAppFreeze
Changes since 1.36:	+16 -0 lines
Log Message:	@ADD: Dump of log in log file
#	Content
1	#! /usr/bin/env python
2	from optparse import OptionParser
3	import sys
4	import time
5	import os
6	import shutil
7
8	parser = OptionParser()
9	parser.add_option("-T", "--tag", dest="tag", default="",
10	help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
11	parser.add_option("-J", "--task", dest="task", default="",
12	help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
13	parser.add_option("-M", "--mass", dest="mass", default="125",
14	help="Mass for DC or Plots, 110...135")
15	parser.add_option("-S","--samples",dest="samples",default="",
16	help="samples you want to run on")
17	parser.add_option("-F", "--folderTag", dest="ftag", default="",
18	help="Creats a new folder structure for outputs or uses an existing one with the given name")
19	parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
20	help="Number of events per file when splitting.")
21	parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
22	help="If you share the love of Philipp...")
23
24	(opts, args) = parser.parse_args(sys.argv)
25
26	import os,shutil,pickle,subprocess,ROOT,re
27	ROOT.gROOT.SetBatch(True)
28	from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
29	import getpass
30
31	if opts.tag == "":
32	print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
33	sys.exit(123)
34
35	if opts.task == "":
36	print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
37	sys.exit(123)
38
39
40	en = opts.tag
41
42	#create the list with the samples to run over
43	samplesList=opts.samples.split(",")
44	timestamp = time.asctime().replace(' ','_').replace(':','-')
45
46	# the list of the config is taken from the path config
47	pathconfig = BetterConfigParser()
48	pathconfig.read('%sconfig/paths'%(en))
49	_configs = pathconfig.get('Configuration','List').split(" ")
50	configs = [ '%sconfig/'%(en) + c for c in _configs ]
51
52	if not opts.ftag == '':
53	tagDir = pathconfig.get('Directories','tagDir')
54	DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }
55
56	for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
57	try:
58	os.stat(DirStruct[keys])
59	except:
60	os.mkdir(DirStruct[keys])
61
62	pathfile = open('%sconfig/paths'%(en))
63	buffer = pathfile.readlines()
64	pathfile.close()
65	os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
66	pathfile = open('%sconfig/paths'%(en),'w')
67	for line in buffer:
68	if line.startswith('plotpath'):
69	line = 'plotpath: %s\n'%DirStruct['plotpath']
70	elif line.startswith('logpath'):
71	line = 'logpath: %s\n'%DirStruct['logpath']
72	elif line.startswith('limits'):
73	line = 'limits: %s\n'%DirStruct['limitpath']
74	pathfile.write(line)
75	pathfile.close()
76
77	#copy config files
78	for item in configs:
79	shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))
80
81
82	print configs
83	config = BetterConfigParser()
84	config.read(configs)
85
86	def dump_config(configs,output_file):
87	"""
88	Dump all the configs in a output file
89	Args:
90	output_file: the file where the log will be dumped
91	configs: list of files (string) to be dumped
92	Returns:
93	nothing
94	"""
95	outf = open(output_file,'w')
96	for i in configs:
97	try:
98	f=open(i,'r')
99	outf.write(f.read())
100	except: print '@WARNING: Config' + i + ' not found. It will not be used.'
101
102	def compile_macro(config,macro):
103	"""
104	Creates the library from a macro using CINT compiling it in scratch to avoid
105	problems with the linking in the working nodes.
106	Args:
107	config: configuration file where the macro path is specified
108	macro: macro name to be compiled
109	Returns:
110	nothing
111	"""
112	submitDir = os.getcwd()
113	_macro=macro+'.h'
114	library = config.get(macro,'library')
115	libDir=os.path.dirname(library)
116	os.chdir(libDir)
117	if not os.path.exists(library):
118	print '@INFO: Compiling ' + _macro
119	scratchDir='/scratch/%s/'%(getpass.getuser())
120	shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
121	os.chdir(scratchDir)
122	ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
123	shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
124	os.chdir(submitDir)
125
126	compile_macro(config,'BTagReshaping')
127	compile_macro(config,'VHbbNameSpace')
128
129	logPath = config.get("Directories","logpath")
130	logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
131	counter = 0
132
133	#check if the logPath exist. If not exit
134	if( not os.path.isdir(logPath) ):
135	print '@ERROR : ' + logPath + ': dir not found.'
136	print '@ERROR : Create it before submitting '
137	print 'Exit'
138	sys.exit(-1)
139
140
141	repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
142	def submit(job,repDict):
143	global counter
144	repDict['job'] = job
145	nJob = counter % len(logo)
146	counter += 1
147	if opts.philipp_love_progress_bars:
148	repDict['name'] = '"%s"' %logo[nJob].strip()
149	else:
150	repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
151	command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
152	print command
153	dump_config(configs,"%(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.config" %(repDict))
154	subprocess.call([command], shell=True)
155
156	if opts.task == 'train':
157	train_list = (config.get('MVALists','List_for_submitscript')).split(',')
158	print train_list
159	for item in train_list:
160	submit(item,repDict)
161
162
163	if opts.task == 'dc':
164	#DC_vars = config.items('Limit')
165	DC_vars= (config.get('LimitGeneral','List')).split(',')
166	print DC_vars
167
168	if opts.task == 'plot':
169	Plot_vars= (config.get('Plot_general','List')).split(',')
170
171	if not opts.task == 'prep':
172	path = config.get("Directories","samplepath")
173	samplesinfo = config.get("Directories","samplesinfo")
174	info = ParseInfo(samplesinfo,path)
175
176	if opts.task == 'plot':
177	repDict['queue'] = 'all.q'
178	for item in Plot_vars:
179	submit(item,repDict)
180
181	if opts.task == 'trainReg':
182	repDict['queue'] = 'all.q'
183	submit('trainReg',repDict)
184
185
186	elif opts.task == 'dc':
187	repDict['queue'] = 'short.q'
188	for item in DC_vars:
189	if 'ZH%s'%opts.mass in item:
190	submit(item,repDict)
191	elif 'ZH' in item and opts.mass == 'all':
192	submit(item,repDict)
193
194	elif opts.task == 'prep':
195	if ( opts.samples == ""):
196	path = config.get("Directories","PREPin")
197	samplesinfo = config.get("Directories","samplesinfo")
198	info = ParseInfo(samplesinfo,path)
199	for job in info:
200	submit(job.name,repDict)
201
202	else:
203	for sample in samplesList:
204	submit(sample,repDict)
205	elif opts.task == 'sys' or opts.task == 'syseval':
206	path = config.get("Directories","SYSin")
207	samplesinfo = config.get("Directories","samplesinfo")
208	info = ParseInfo(samplesinfo,path)
209	if ( opts.samples == ""):
210	for job in info:
211	if (job.subsample):
212	continue #avoid multiple submissions form subsamples
213	# TO FIX FOR SPLITTED SAMPLE
214	submit(job.name,repDict)
215	else:
216	for sample in samplesList:
217	submit(sample,repDict)
218
219	elif opts.task == 'eval':
220	path = config.get("Directories","MVAin")
221	samplesinfo = config.get("Directories","samplesinfo")
222	info = ParseInfo(samplesinfo,path)
223	if ( opts.samples == ""):
224	for job in info:
225	if (job.subsample):
226	continue #avoid multiple submissions from subsamples
227	if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name (splitted samples) use the identifier to submit
228	print '@INFO: Splitted samples: submit through identifier'
229	submit(job.identifier,repDict)
230	else: submit(job.name,repDict)
231	else:
232	for sample in samplesList:
233	submit(sample,repDict)
234
235
236	elif( opts.task == 'split' ):
237	path = config.get("Directories","SPLITin")
238	samplesinfo = config.get("Directories","samplesinfo")
239	repDict['job_id']=opts.nevents_split
240	info = ParseInfo(samplesinfo,path)
241	if ( opts.samples == "" ):
242	for job in info:
243	if (job.subsample): continue #avoid multiple submissions from subsamples
244	submit(job.name,repDict)
245	else:
246	for sample in samplesList:
247	submit(sample,repDict)
248
249	#BDT optimisation
250	elif opts.task == 'mva_opt':
251	total_number_of_steps=1
252	setting = ''
253	for par in (config.get('Optimisation','parameters').split(',')):
254	scan_par=eval(config.get('Optimisation',par))
255	setting+=par+'='+str(scan_par[0])+':'
256	if len(scan_par) > 1 and scan_par[2] != 0:
257	total_number_of_steps+=scan_par[2]
258	setting=setting[:-1] # eliminate last column at the end of the setting string
259	print setting
260	repDict['additional']=setting
261	repDict['job_id']=config.get('Optimisation','training')
262	submit('OPT_main_set',repDict)
263	main_setting=setting
264
265	#Scanning all the parameters found in the training config in the Optimisation sector
266	for par in (config.get('Optimisation','parameters').split(',')):
267	scan_par=eval(config.get('Optimisation',par))
268	print par
269	if len(scan_par) > 1 and scan_par[2] != 0:
270	for step in range(scan_par[2]):
271	value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
272	print value
273	setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
274	repDict['additional']=setting
275	# repDict['job_id']=config.get('Optimisation','training')
276	submit('OPT_'+par+str(value),repDict)
277	# submit(config.get('Optimisation','training'),repDict)
278	print setting
279
280
281	os.system('qstat')
282	if (opts.philipp_love_progress_bars):
283	os.system('./qstat.py')