VHbb/python/submitThem.py

#! /usr/bin/env python
from optparse import OptionParser
import sys
import time
import os
import shutil

parser = OptionParser()
parser.add_option("-T", "--tag", dest="tag", default="",
                      help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
parser.add_option("-J", "--task", dest="task", default="",
                      help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
parser.add_option("-M", "--mass", dest="mass", default="125",
                      help="Mass for DC or Plots, 110...135")
parser.add_option("-S","--samples",dest="samples",default="",
                      help="samples you want to run on")
parser.add_option("-F", "--folderTag", dest="ftag", default="",
                      help="Creats a new folder structure for outputs or uses an existing one with the given name")
parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
                      help="Number of events per file when splitting.")
parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
                      help="If you share the love of Philipp...")

(opts, args) = parser.parse_args(sys.argv)

import os,shutil,pickle,subprocess,ROOT,re
ROOT.gROOT.SetBatch(True)
from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
import getpass

if opts.tag == "":
        print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
        sys.exit(123)

if opts.task == "":
    print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
    sys.exit(123)


en = opts.tag

#create the list with the samples to run over
samplesList=opts.samples.split(",")
timestamp = time.asctime().replace(' ','_').replace(':','-')

# the list of the config is taken from the path config
pathconfig = BetterConfigParser()
pathconfig.read('%sconfig/paths'%(en))
_configs = pathconfig.get('Configuration','List').split(" ")
configs = [ '%sconfig/'%(en) + c for c in _configs  ]

if not opts.ftag == '':
    tagDir = pathconfig.get('Directories','tagDir')
    DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }

    for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
        try:
            os.stat(DirStruct[keys])
        except:
            os.mkdir(DirStruct[keys])

    pathfile = open('%sconfig/paths'%(en))
    buffer = pathfile.readlines()
    pathfile.close()
    os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
    pathfile = open('%sconfig/paths'%(en),'w')
    for line in buffer:
        if line.startswith('plotpath'):
            line = 'plotpath: %s\n'%DirStruct['plotpath']
        elif line.startswith('logpath'):
            line = 'logpath: %s\n'%DirStruct['logpath']
        elif line.startswith('limits'):
            line = 'limits: %s\n'%DirStruct['limitpath']
        pathfile.write(line)
    pathfile.close()

    #copy config files
    for item in configs:
        shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))


print configs
config = BetterConfigParser()
config.read(configs)


def compile_macro(config,macro):
    """
    Creates the library from a macro using CINT compiling it in scratch to avoid
    problems with the linking in the working nodes.
    Args:
        config: configuration file where the macro path is specified
        macro: macro name to be compiled
    Returns:
        nothing
    """
    submitDir = os.getcwd()
    _macro=macro+'.h'
    library = config.get(macro,'library')
    libDir=os.path.dirname(library)
    os.chdir(libDir)
    if not os.path.exists(library):
        print '@INFO: Compiling ' + _macro
        scratchDir='/scratch/%s/'%(getpass.getuser())
        shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
        os.chdir(scratchDir)
        ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
        shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
    os.chdir(submitDir)
        
compile_macro(config,'BTagReshaping')
compile_macro(config,'VHbbNameSpace')

logPath = config.get("Directories","logpath")
logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
counter = 0

#check if the logPath exist. If not exit
if( not os.path.isdir(logPath) ):
    print '@ERROR : ' + logPath + ': dir not found.'
    print '@ERROR : Create it before submitting '
    print 'Exit'
    sys.exit(-1)
    

repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
def submit(job,repDict):
    global counter
    repDict['job'] = job
    nJob = counter % len(logo)
    counter += 1
    if opts.philipp_love_progress_bars:
        repDict['name'] = '"%s"' %logo[nJob].strip()
    else:
        repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
    command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
    print command
    subprocess.call([command], shell=True)

if opts.task == 'train':
    train_list = (config.get('MVALists','List_for_submitscript')).split(',')
    print train_list
    for item in train_list:
        submit(item,repDict)


if opts.task == 'dc':
    #DC_vars = config.items('Limit')
    DC_vars= (config.get('LimitGeneral','List')).split(',')
    print DC_vars

if opts.task == 'plot':
    Plot_vars= (config.get('Plot_general','List')).split(',')

if not opts.task == 'prep':
    path = config.get("Directories","samplepath")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)

if opts.task == 'plot': 
    repDict['queue'] = 'all.q'
    for item in Plot_vars:
        submit(item,repDict)


elif opts.task == 'dc':
    repDict['queue'] = 'short.q'
    for item in DC_vars:
        if 'ZH%s'%opts.mass in item:
            submit(item,repDict) 
        elif 'ZH' in item and opts.mass == 'all':
            submit(item,repDict)
            
elif opts.task == 'prep':
    if ( opts.samples == ""):
        path = config.get("Directories","PREPin")
        samplesinfo = config.get("Directories","samplesinfo")
        info = ParseInfo(samplesinfo,path)
        for job in info:
            submit(job.name,repDict)

    else:
        for sample in samplesList:
            submit(sample,repDict)
elif opts.task == 'sys' or opts.task == 'syseval':
    path = config.get("Directories","SYSin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions form subsamples
            # TO FIX FOR SPLITTED SAMPLE
            submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)

elif opts.task == 'eval':
    path = config.get("Directories","MVAin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions from subsamples
            if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name  (splitted samples) use the identifier to submit
                print '@INFO: Splitted samples: submit through identifier'
                submit(job.identifier,repDict)
            else: submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)


elif( opts.task == 'split' ):
        path = config.get("Directories","SPLITin")
        samplesinfo = config.get("Directories","samplesinfo")
        repDict['job_id']=opts.nevents_split
        info = ParseInfo(samplesinfo,path)
        if ( opts.samples == "" ):
                for job in info:
                        if (job.subsample): continue #avoid multiple submissions from subsamples
                        submit(job.name,repDict)
        else:
                for sample in samplesList:
                        submit(sample,repDict)

#BDT optimisation
elif opts.task == 'mva_opt':
        total_number_of_steps=1
        setting = ''
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                setting+=par+'='+str(scan_par[0])+':'
                if len(scan_par) > 1 and scan_par[2] != 0:
                        total_number_of_steps+=scan_par[2]
        setting=setting[:-1] # eliminate last column at the end of the setting string
        print setting
        repDict['additional']=setting
        repDict['job_id']=config.get('Optimisation','training')
        submit('OPT_main_set',repDict)
        main_setting=setting

        #Scanning all the parameters found in the training config in the Optimisation sector
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                print par
                if len(scan_par) > 1 and scan_par[2] != 0:
                        for step in range(scan_par[2]):
                                value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
                                print value
                                setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
                                repDict['additional']=setting
#                               repDict['job_id']=config.get('Optimisation','training')
                                submit('OPT_'+par+str(value),repDict)
#                               submit(config.get('Optimisation','training'),repDict)
                                print setting


os.system('qstat')
if (opts.philipp_love_progress_bars):
        os.system('./qstat.py') 
Revision:	1.35
Committed:	Thu Mar 21 14:18:17 2013 UTC (12 years, 1 month ago) by bortigno
Content type:	text/x-python
Branch:	MAIN
Changes since 1.34:	+33 -13 lines
Log Message:	@FIX + ADD: list of the config from the path config. (not hardcoded anymore). Add function to compile macro from scratch (as meister suggests) + copy back.
#	User	Rev	Content
1	nmohr	1.1	#! /usr/bin/env python
2	nmohr	1.6	from optparse import OptionParser
3	bortigno	1.26	import sys
4	peller	1.28	import time
5			import os
6			import shutil
7	nmohr	1.7
8	nmohr	1.6	parser = OptionParser()
9			parser.add_option("-T", "--tag", dest="tag", default="",
10			help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
11	peller	1.9	parser.add_option("-J", "--task", dest="task", default="",
12	nmohr	1.19	help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
13	peller	1.9	parser.add_option("-M", "--mass", dest="mass", default="125",
14	bortigno	1.10	help="Mass for DC or Plots, 110...135")
15			parser.add_option("-S","--samples",dest="samples",default="",
16			help="samples you want to run on")
17	peller	1.28	parser.add_option("-F", "--folderTag", dest="ftag", default="",
18			help="Creats a new folder structure for outputs or uses an existing one with the given name")
19	bortigno	1.30	parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
20			help="Number of events per file when splitting.")
21			parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
22			help="If you share the love of Philipp...")
23
24	bortigno	1.26	(opts, args) = parser.parse_args(sys.argv)
25
26	bortigno	1.30	import os,shutil,pickle,subprocess,ROOT,re
27	nmohr	1.27	ROOT.gROOT.SetBatch(True)
28	bortigno	1.31	from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
29	bortigno	1.26	import getpass
30	nmohr	1.6
31			if opts.tag == "":
32			print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
33			sys.exit(123)
34	peller	1.9
35			if opts.task == "":
36			print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
37			sys.exit(123)
38
39	peller	1.28
40			en = opts.tag
41
42	bortigno	1.11	#create the list with the samples to run over
43			samplesList=opts.samples.split(",")
44	peller	1.28	timestamp = time.asctime().replace(' ','_').replace(':','-')
45
46	bortigno	1.35	# the list of the config is taken from the path config
47	peller	1.28	pathconfig = BetterConfigParser()
48			pathconfig.read('%sconfig/paths'%(en))
49	bortigno	1.35	_configs = pathconfig.get('Configuration','List').split(" ")
50			configs = [ '%sconfig/'%(en) + c for c in _configs ]
51	peller	1.28
52			if not opts.ftag == '':
53			tagDir = pathconfig.get('Directories','tagDir')
54			DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }
55
56			for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
57			try:
58			os.stat(DirStruct[keys])
59			except:
60			os.mkdir(DirStruct[keys])
61
62			pathfile = open('%sconfig/paths'%(en))
63			buffer = pathfile.readlines()
64			pathfile.close()
65			os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
66			pathfile = open('%sconfig/paths'%(en),'w')
67			for line in buffer:
68			if line.startswith('plotpath'):
69			line = 'plotpath: %s\n'%DirStruct['plotpath']
70			elif line.startswith('logpath'):
71			line = 'logpath: %s\n'%DirStruct['logpath']
72			elif line.startswith('limits'):
73			line = 'limits: %s\n'%DirStruct['limitpath']
74			pathfile.write(line)
75			pathfile.close()
76
77			#copy config files
78			for item in configs:
79			shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))
80
81
82	nmohr	1.6	print configs
83			config = BetterConfigParser()
84			config.read(configs)
85	peller	1.28
86	bortigno	1.35
87			def compile_macro(config,macro):
88			"""
89			Creates the library from a macro using CINT compiling it in scratch to avoid
90			problems with the linking in the working nodes.
91			Args:
92			config: configuration file where the macro path is specified
93			macro: macro name to be compiled
94			Returns:
95			nothing
96			"""
97			submitDir = os.getcwd()
98			_macro=macro+'.h'
99			library = config.get(macro,'library')
100			libDir=os.path.dirname(library)
101			os.chdir(libDir)
102			if not os.path.exists(library):
103			print '@INFO: Compiling ' + _macro
104			scratchDir='/scratch/%s/'%(getpass.getuser())
105			shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
106			os.chdir(scratchDir)
107			ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
108			shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
109			os.chdir(submitDir)
110
111			compile_macro(config,'BTagReshaping')
112			compile_macro(config,'VHbbNameSpace')
113
114	nmohr	1.6	logPath = config.get("Directories","logpath")
115	nmohr	1.34	logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
116			counter = 0
117
118	bortigno	1.14	#check if the logPath exist. If not exit
119			if( not os.path.isdir(logPath) ):
120	bortigno	1.35	print '@ERROR : ' + logPath + ': dir not found.'
121			print '@ERROR : Create it before submitting '
122	nmohr	1.34	print 'Exit'
123			sys.exit(-1)
124
125	bortigno	1.14
126	bortigno	1.30	repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
127	nmohr	1.6	def submit(job,repDict):
128	nmohr	1.34	global counter
129			repDict['job'] = job
130			nJob = counter % len(logo)
131			counter += 1
132			if opts.philipp_love_progress_bars:
133			repDict['name'] = '"%s"' %logo[nJob].strip()
134			else:
135			repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
136			command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
137			print command
138			subprocess.call([command], shell=True)
139	nmohr	1.1
140	peller	1.29	if opts.task == 'train':
141			train_list = (config.get('MVALists','List_for_submitscript')).split(',')
142			print train_list
143			for item in train_list:
144			submit(item,repDict)
145
146
147	peller	1.9	if opts.task == 'dc':
148	peller	1.18	#DC_vars = config.items('Limit')
149			DC_vars= (config.get('LimitGeneral','List')).split(',')
150			print DC_vars
151
152	peller	1.9	if opts.task == 'plot':
153	peller	1.15	Plot_vars= (config.get('Plot_general','List')).split(',')
154	peller	1.8
155	peller	1.12	if not opts.task == 'prep':
156			path = config.get("Directories","samplepath")
157	nmohr	1.21	samplesinfo = config.get("Directories","samplesinfo")
158	nmohr	1.27	info = ParseInfo(samplesinfo,path)
159	peller	1.8
160	peller	1.9	if opts.task == 'plot':
161	peller	1.23	repDict['queue'] = 'all.q'
162	peller	1.9	for item in Plot_vars:
163	peller	1.15	submit(item,repDict)
164	peller	1.9
165	bortigno	1.30
166	peller	1.9	elif opts.task == 'dc':
167	bortigno	1.32	repDict['queue'] = 'short.q'
168	peller	1.9	for item in DC_vars:
169	peller	1.22	if 'ZH%s'%opts.mass in item:
170	peller	1.18	submit(item,repDict)
171	peller	1.22	elif 'ZH' in item and opts.mass == 'all':
172	peller	1.18	submit(item,repDict)
173	peller	1.17
174	peller	1.9	elif opts.task == 'prep':
175	nmohr	1.33	if ( opts.samples == ""):
176			path = config.get("Directories","PREPin")
177			samplesinfo = config.get("Directories","samplesinfo")
178			info = ParseInfo(samplesinfo,path)
179			for job in info:
180			submit(job.name,repDict)
181	peller	1.9
182	nmohr	1.33	else:
183			for sample in samplesList:
184			submit(sample,repDict)
185	bortigno	1.30	elif opts.task == 'sys' or opts.task == 'syseval':
186			path = config.get("Directories","SYSin")
187			samplesinfo = config.get("Directories","samplesinfo")
188			info = ParseInfo(samplesinfo,path)
189			if ( opts.samples == ""):
190			for job in info:
191	nmohr	1.34	if (job.subsample):
192			continue #avoid multiple submissions form subsamples
193			# TO FIX FOR SPLITTED SAMPLE
194	bortigno	1.30	submit(job.name,repDict)
195			else:
196			for sample in samplesList:
197			submit(sample,repDict)
198
199			elif opts.task == 'eval':
200			path = config.get("Directories","MVAin")
201			samplesinfo = config.get("Directories","samplesinfo")
202			info = ParseInfo(samplesinfo,path)
203	bortigno	1.10	if ( opts.samples == ""):
204			for job in info:
205	nmohr	1.34	if (job.subsample):
206			continue #avoid multiple submissions from subsamples
207			if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name (splitted samples) use the identifier to submit
208			print '@INFO: Splitted samples: submit through identifier'
209			submit(job.identifier,repDict)
210			else: submit(job.name,repDict)
211	bortigno	1.10	else:
212	bortigno	1.11	for sample in samplesList:
213	bortigno	1.10	submit(sample,repDict)
214	peller	1.20
215	bortigno	1.30
216			elif( opts.task == 'split' ):
217			path = config.get("Directories","SPLITin")
218			samplesinfo = config.get("Directories","samplesinfo")
219	bortigno	1.32	repDict['job_id']=opts.nevents_split
220	bortigno	1.30	info = ParseInfo(samplesinfo,path)
221	bortigno	1.32	if ( opts.samples == "" ):
222	bortigno	1.30	for job in info:
223	bortigno	1.32	if (job.subsample): continue #avoid multiple submissions from subsamples
224	bortigno	1.30	submit(job.name,repDict)
225			else:
226			for sample in samplesList:
227			submit(sample,repDict)
228
229			#BDT optimisation
230			elif opts.task == 'mva_opt':
231			total_number_of_steps=1
232			setting = ''
233			for par in (config.get('Optimisation','parameters').split(',')):
234			scan_par=eval(config.get('Optimisation',par))
235			setting+=par+'='+str(scan_par[0])+':'
236			if len(scan_par) > 1 and scan_par[2] != 0:
237			total_number_of_steps+=scan_par[2]
238			setting=setting[:-1] # eliminate last column at the end of the setting string
239			print setting
240			repDict['additional']=setting
241			repDict['job_id']=config.get('Optimisation','training')
242			submit('OPT_main_set',repDict)
243			main_setting=setting
244
245			#Scanning all the parameters found in the training config in the Optimisation sector
246			for par in (config.get('Optimisation','parameters').split(',')):
247			scan_par=eval(config.get('Optimisation',par))
248			print par
249			if len(scan_par) > 1 and scan_par[2] != 0:
250			for step in range(scan_par[2]):
251			value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
252			print value
253			setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
254			repDict['additional']=setting
255			# repDict['job_id']=config.get('Optimisation','training')
256			submit('OPT_'+par+str(value),repDict)
257			# submit(config.get('Optimisation','training'),repDict)
258			print setting
259
260
261			os.system('qstat')
262			if (opts.philipp_love_progress_bars):
263			os.system('./qstat.py')