VHbb/python/submitThem.py

#! /usr/bin/env python
from optparse import OptionParser
import sys
import time
import os
import shutil

parser = OptionParser()
parser.add_option("-T", "--tag", dest="tag", default="",
                      help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
parser.add_option("-J", "--task", dest="task", default="",
                      help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
parser.add_option("-M", "--mass", dest="mass", default="125",
                      help="Mass for DC or Plots, 110...135")
parser.add_option("-S","--samples",dest="samples",default="",
                      help="samples you want to run on")
parser.add_option("-F", "--folderTag", dest="ftag", default="",
                      help="Creats a new folder structure for outputs or uses an existing one with the given name")
parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
                      help="Number of events per file when splitting.")
parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
                      help="If you share the love of Philipp...")

(opts, args) = parser.parse_args(sys.argv)

import os,shutil,pickle,subprocess,ROOT,re
ROOT.gROOT.SetBatch(True)
from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
import getpass

if opts.tag == "":
        print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
        sys.exit(123)

if opts.task == "":
    print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
    sys.exit(123)


en = opts.tag

#create the list with the samples to run over
samplesList=opts.samples.split(",")
timestamp = time.asctime().replace(' ','_').replace(':','-')

# the list of the config is taken from the path config
pathconfig = BetterConfigParser()
pathconfig.read('%sconfig/paths'%(en))
_configs = pathconfig.get('Configuration','List').split(" ")
configs = [ '%sconfig/'%(en) + c for c in _configs  ]

if not opts.ftag == '':
    tagDir = pathconfig.get('Directories','tagDir')
    DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }

    for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
        try:
            os.stat(DirStruct[keys])
        except:
            os.mkdir(DirStruct[keys])

    pathfile = open('%sconfig/paths'%(en))
    buffer = pathfile.readlines()
    pathfile.close()
    os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
    pathfile = open('%sconfig/paths'%(en),'w')
    for line in buffer:
        if line.startswith('plotpath'):
            line = 'plotpath: %s\n'%DirStruct['plotpath']
        elif line.startswith('logpath'):
            line = 'logpath: %s\n'%DirStruct['logpath']
        elif line.startswith('limits'):
            line = 'limits: %s\n'%DirStruct['limitpath']
        pathfile.write(line)
    pathfile.close()

    #copy config files
    for item in configs:
        shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))


print configs
config = BetterConfigParser()
config.read(configs)


def compile_macro(config,macro):
    """
    Creates the library from a macro using CINT compiling it in scratch to avoid
    problems with the linking in the working nodes.
    Args:
        config: configuration file where the macro path is specified
        macro: macro name to be compiled
    Returns:
        nothing
    """
    submitDir = os.getcwd()
    _macro=macro+'.h'
    library = config.get(macro,'library')
    libDir=os.path.dirname(library)
    os.chdir(libDir)
    if not os.path.exists(library):
        print '@INFO: Compiling ' + _macro
        scratchDir='/scratch/%s/'%(getpass.getuser())
        shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
        os.chdir(scratchDir)
        ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
        shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
    os.chdir(submitDir)
        
compile_macro(config,'BTagReshaping')
compile_macro(config,'VHbbNameSpace')

logPath = config.get("Directories","logpath")
logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
counter = 0

#check if the logPath exist. If not exit
if( not os.path.isdir(logPath) ):
    print '@ERROR : ' + logPath + ': dir not found.'
    print '@ERROR : Create it before submitting '
    print 'Exit'
    sys.exit(-1)
    

repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
def submit(job,repDict):
    global counter
    repDict['job'] = job
    nJob = counter % len(logo)
    counter += 1
    if opts.philipp_love_progress_bars:
        repDict['name'] = '"%s"' %logo[nJob].strip()
    else:
        repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
    command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
    print command
    subprocess.call([command], shell=True)

if opts.task == 'train':
    train_list = (config.get('MVALists','List_for_submitscript')).split(',')
    print train_list
    for item in train_list:
        submit(item,repDict)


if opts.task == 'dc':
    #DC_vars = config.items('Limit')
    DC_vars= (config.get('LimitGeneral','List')).split(',')
    print DC_vars

if opts.task == 'plot':
    Plot_vars= (config.get('Plot_general','List')).split(',')

if not opts.task == 'prep':
    path = config.get("Directories","samplepath")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)

if opts.task == 'plot': 
    repDict['queue'] = 'all.q'
    for item in Plot_vars:
        submit(item,repDict)

if opts.task == 'trainReg':
    repDict['queue'] = 'all.q'
    submit('trainReg',repDict)


elif opts.task == 'dc':
    repDict['queue'] = 'short.q'
    for item in DC_vars:
        if 'ZH%s'%opts.mass in item:
            submit(item,repDict) 
        elif 'ZH' in item and opts.mass == 'all':
            submit(item,repDict)
            
elif opts.task == 'prep':
    if ( opts.samples == ""):
        path = config.get("Directories","PREPin")
        samplesinfo = config.get("Directories","samplesinfo")
        info = ParseInfo(samplesinfo,path)
        for job in info:
            submit(job.name,repDict)

    else:
        for sample in samplesList:
            submit(sample,repDict)
elif opts.task == 'sys' or opts.task == 'syseval':
    path = config.get("Directories","SYSin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions form subsamples
            # TO FIX FOR SPLITTED SAMPLE
            submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)

elif opts.task == 'eval':
    path = config.get("Directories","MVAin")
    samplesinfo = config.get("Directories","samplesinfo")
    info = ParseInfo(samplesinfo,path)
    if ( opts.samples == ""):
        for job in info:
            if (job.subsample): 
                continue #avoid multiple submissions from subsamples
            if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name  (splitted samples) use the identifier to submit
                print '@INFO: Splitted samples: submit through identifier'
                submit(job.identifier,repDict)
            else: submit(job.name,repDict)
    else:
        for sample in samplesList:
            submit(sample,repDict)


elif( opts.task == 'split' ):
        path = config.get("Directories","SPLITin")
        samplesinfo = config.get("Directories","samplesinfo")
        repDict['job_id']=opts.nevents_split
        info = ParseInfo(samplesinfo,path)
        if ( opts.samples == "" ):
                for job in info:
                        if (job.subsample): continue #avoid multiple submissions from subsamples
                        submit(job.name,repDict)
        else:
                for sample in samplesList:
                        submit(sample,repDict)

#BDT optimisation
elif opts.task == 'mva_opt':
        total_number_of_steps=1
        setting = ''
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                setting+=par+'='+str(scan_par[0])+':'
                if len(scan_par) > 1 and scan_par[2] != 0:
                        total_number_of_steps+=scan_par[2]
        setting=setting[:-1] # eliminate last column at the end of the setting string
        print setting
        repDict['additional']=setting
        repDict['job_id']=config.get('Optimisation','training')
        submit('OPT_main_set',repDict)
        main_setting=setting

        #Scanning all the parameters found in the training config in the Optimisation sector
        for par in (config.get('Optimisation','parameters').split(',')):
                scan_par=eval(config.get('Optimisation',par))
                print par
                if len(scan_par) > 1 and scan_par[2] != 0:
                        for step in range(scan_par[2]):
                                value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
                                print value
                                setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
                                repDict['additional']=setting
#                               repDict['job_id']=config.get('Optimisation','training')
                                submit('OPT_'+par+str(value),repDict)
#                               submit(config.get('Optimisation','training'),repDict)
                                print setting


os.system('qstat')
if (opts.philipp_love_progress_bars):
        os.system('./qstat.py') 
Revision:	1.36
Committed:	Fri Mar 22 15:19:28 2013 UTC (12 years, 1 month ago) by nmohr
Content type:	text/x-python
Branch:	MAIN
Changes since 1.35:	+4 -0 lines
Log Message:	Add regression submission
#	User	Rev	Content
1	nmohr	1.1	#! /usr/bin/env python
2	nmohr	1.6	from optparse import OptionParser
3	bortigno	1.26	import sys
4	peller	1.28	import time
5			import os
6			import shutil
7	nmohr	1.7
8	nmohr	1.6	parser = OptionParser()
9			parser.add_option("-T", "--tag", dest="tag", default="",
10			help="Tag to run the analysis with, example '8TeV' uses config8TeV and pathConfig8TeV to run the analysis")
11	peller	1.9	parser.add_option("-J", "--task", dest="task", default="",
12	nmohr	1.19	help="Task to be done, i.e. 'dc' for Datacards, 'prep' for preparation of Trees, 'plot' to produce plots or 'eval' to write the MVA output or 'sys' to write regression and systematics (or 'syseval' for both). ")
13	peller	1.9	parser.add_option("-M", "--mass", dest="mass", default="125",
14	bortigno	1.10	help="Mass for DC or Plots, 110...135")
15			parser.add_option("-S","--samples",dest="samples",default="",
16			help="samples you want to run on")
17	peller	1.28	parser.add_option("-F", "--folderTag", dest="ftag", default="",
18			help="Creats a new folder structure for outputs or uses an existing one with the given name")
19	bortigno	1.30	parser.add_option("-N", "--number-of-events", dest="nevents_split", default=100000,
20			help="Number of events per file when splitting.")
21			parser.add_option("-P", "--philipp-love-progress-bars", dest="philipp_love_progress_bars", default=False,
22			help="If you share the love of Philipp...")
23
24	bortigno	1.26	(opts, args) = parser.parse_args(sys.argv)
25
26	bortigno	1.30	import os,shutil,pickle,subprocess,ROOT,re
27	nmohr	1.27	ROOT.gROOT.SetBatch(True)
28	bortigno	1.31	from myutils import BetterConfigParser, Sample, ParseInfo, sample_parser
29	bortigno	1.26	import getpass
30	nmohr	1.6
31			if opts.tag == "":
32			print "Please provide tag to run the analysis with, example '-T 8TeV' uses config8TeV and pathConfig8TeV to run the analysis."
33			sys.exit(123)
34	peller	1.9
35			if opts.task == "":
36			print "Please provide a task.\n-J prep:\tpreparation of Trees\n-J sys:\t\twrite regression and systematics\n-J eval:\tcreate MVA output\n-J plot:\tproduce Plots\n-J dc:\t\twrite workspaces and datacards"
37			sys.exit(123)
38
39	peller	1.28
40			en = opts.tag
41
42	bortigno	1.11	#create the list with the samples to run over
43			samplesList=opts.samples.split(",")
44	peller	1.28	timestamp = time.asctime().replace(' ','_').replace(':','-')
45
46	bortigno	1.35	# the list of the config is taken from the path config
47	peller	1.28	pathconfig = BetterConfigParser()
48			pathconfig.read('%sconfig/paths'%(en))
49	bortigno	1.35	_configs = pathconfig.get('Configuration','List').split(" ")
50			configs = [ '%sconfig/'%(en) + c for c in _configs ]
51	peller	1.28
52			if not opts.ftag == '':
53			tagDir = pathconfig.get('Directories','tagDir')
54			DirStruct={'tagDir':tagDir,'ftagdir':'%s/%s/'%(tagDir,opts.ftag),'logpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Logs'),'plotpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Plots'),'limitpath':'%s/%s/%s/'%(tagDir,opts.ftag,'Limits'),'confpath':'%s/%s/%s/'%(tagDir,opts.ftag,'config') }
55
56			for keys in ['tagDir','ftagdir','logpath','plotpath','limitpath','confpath']:
57			try:
58			os.stat(DirStruct[keys])
59			except:
60			os.mkdir(DirStruct[keys])
61
62			pathfile = open('%sconfig/paths'%(en))
63			buffer = pathfile.readlines()
64			pathfile.close()
65			os.rename('%sconfig/paths'%(en),'%sconfig/paths.bkp'%(en))
66			pathfile = open('%sconfig/paths'%(en),'w')
67			for line in buffer:
68			if line.startswith('plotpath'):
69			line = 'plotpath: %s\n'%DirStruct['plotpath']
70			elif line.startswith('logpath'):
71			line = 'logpath: %s\n'%DirStruct['logpath']
72			elif line.startswith('limits'):
73			line = 'limits: %s\n'%DirStruct['limitpath']
74			pathfile.write(line)
75			pathfile.close()
76
77			#copy config files
78			for item in configs:
79			shutil.copyfile(item,'%s/%s/%s'%(tagDir,opts.ftag,item.strip(en)))
80
81
82	nmohr	1.6	print configs
83			config = BetterConfigParser()
84			config.read(configs)
85	peller	1.28
86	bortigno	1.35
87			def compile_macro(config,macro):
88			"""
89			Creates the library from a macro using CINT compiling it in scratch to avoid
90			problems with the linking in the working nodes.
91			Args:
92			config: configuration file where the macro path is specified
93			macro: macro name to be compiled
94			Returns:
95			nothing
96			"""
97			submitDir = os.getcwd()
98			_macro=macro+'.h'
99			library = config.get(macro,'library')
100			libDir=os.path.dirname(library)
101			os.chdir(libDir)
102			if not os.path.exists(library):
103			print '@INFO: Compiling ' + _macro
104			scratchDir='/scratch/%s/'%(getpass.getuser())
105			shutil.copyfile(libDir+'/'+_macro,'/scratch/%s/%s'%(getpass.getuser(),_macro))
106			os.chdir(scratchDir)
107			ROOT.gROOT.ProcessLine('.L %s+'%(scratchDir+_macro))
108			shutil.copyfile('/scratch/%s/%s'%(getpass.getuser(),os.path.basename(library)),library)
109			os.chdir(submitDir)
110
111			compile_macro(config,'BTagReshaping')
112			compile_macro(config,'VHbbNameSpace')
113
114	nmohr	1.6	logPath = config.get("Directories","logpath")
115	nmohr	1.34	logo = open('%s/data/submit.txt' %config.get('Directories','vhbbpath')).readlines()
116			counter = 0
117
118	bortigno	1.14	#check if the logPath exist. If not exit
119			if( not os.path.isdir(logPath) ):
120	bortigno	1.35	print '@ERROR : ' + logPath + ': dir not found.'
121			print '@ERROR : Create it before submitting '
122	nmohr	1.34	print 'Exit'
123			sys.exit(-1)
124
125	bortigno	1.14
126	bortigno	1.30	repDict = {'en':en,'logpath':logPath,'job':'','task':opts.task,'queue': 'all.q','timestamp':timestamp,'additional':'','job_id':''}
127	nmohr	1.6	def submit(job,repDict):
128	nmohr	1.34	global counter
129			repDict['job'] = job
130			nJob = counter % len(logo)
131			counter += 1
132			if opts.philipp_love_progress_bars:
133			repDict['name'] = '"%s"' %logo[nJob].strip()
134			else:
135			repDict['name'] = '%(job)s_%(en)s%(task)s' %repDict
136			command = 'qsub -V -cwd -q %(queue)s -l h_vmem=6G -N %(name)s -o %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.out -e %(logpath)s/%(timestamp)s_%(job)s_%(en)s_%(task)s.err runAll.sh %(job)s %(en)s ' %(repDict) + opts.task + ' ' + repDict['job_id'] + ' ' + repDict['additional']
137			print command
138			subprocess.call([command], shell=True)
139	nmohr	1.1
140	peller	1.29	if opts.task == 'train':
141			train_list = (config.get('MVALists','List_for_submitscript')).split(',')
142			print train_list
143			for item in train_list:
144			submit(item,repDict)
145
146
147	peller	1.9	if opts.task == 'dc':
148	peller	1.18	#DC_vars = config.items('Limit')
149			DC_vars= (config.get('LimitGeneral','List')).split(',')
150			print DC_vars
151
152	peller	1.9	if opts.task == 'plot':
153	peller	1.15	Plot_vars= (config.get('Plot_general','List')).split(',')
154	peller	1.8
155	peller	1.12	if not opts.task == 'prep':
156			path = config.get("Directories","samplepath")
157	nmohr	1.21	samplesinfo = config.get("Directories","samplesinfo")
158	nmohr	1.27	info = ParseInfo(samplesinfo,path)
159	peller	1.8
160	peller	1.9	if opts.task == 'plot':
161	peller	1.23	repDict['queue'] = 'all.q'
162	peller	1.9	for item in Plot_vars:
163	peller	1.15	submit(item,repDict)
164	peller	1.9
165	nmohr	1.36	if opts.task == 'trainReg':
166			repDict['queue'] = 'all.q'
167			submit('trainReg',repDict)
168
169	bortigno	1.30
170	peller	1.9	elif opts.task == 'dc':
171	bortigno	1.32	repDict['queue'] = 'short.q'
172	peller	1.9	for item in DC_vars:
173	peller	1.22	if 'ZH%s'%opts.mass in item:
174	peller	1.18	submit(item,repDict)
175	peller	1.22	elif 'ZH' in item and opts.mass == 'all':
176	peller	1.18	submit(item,repDict)
177	peller	1.17
178	peller	1.9	elif opts.task == 'prep':
179	nmohr	1.33	if ( opts.samples == ""):
180			path = config.get("Directories","PREPin")
181			samplesinfo = config.get("Directories","samplesinfo")
182			info = ParseInfo(samplesinfo,path)
183			for job in info:
184			submit(job.name,repDict)
185	peller	1.9
186	nmohr	1.33	else:
187			for sample in samplesList:
188			submit(sample,repDict)
189	bortigno	1.30	elif opts.task == 'sys' or opts.task == 'syseval':
190			path = config.get("Directories","SYSin")
191			samplesinfo = config.get("Directories","samplesinfo")
192			info = ParseInfo(samplesinfo,path)
193			if ( opts.samples == ""):
194			for job in info:
195	nmohr	1.34	if (job.subsample):
196			continue #avoid multiple submissions form subsamples
197			# TO FIX FOR SPLITTED SAMPLE
198	bortigno	1.30	submit(job.name,repDict)
199			else:
200			for sample in samplesList:
201			submit(sample,repDict)
202
203			elif opts.task == 'eval':
204			path = config.get("Directories","MVAin")
205			samplesinfo = config.get("Directories","samplesinfo")
206			info = ParseInfo(samplesinfo,path)
207	bortigno	1.10	if ( opts.samples == ""):
208			for job in info:
209	nmohr	1.34	if (job.subsample):
210			continue #avoid multiple submissions from subsamples
211			if(info.checkSplittedSampleName(job.identifier)): # if multiple entries for one name (splitted samples) use the identifier to submit
212			print '@INFO: Splitted samples: submit through identifier'
213			submit(job.identifier,repDict)
214			else: submit(job.name,repDict)
215	bortigno	1.10	else:
216	bortigno	1.11	for sample in samplesList:
217	bortigno	1.10	submit(sample,repDict)
218	peller	1.20
219	bortigno	1.30
220			elif( opts.task == 'split' ):
221			path = config.get("Directories","SPLITin")
222			samplesinfo = config.get("Directories","samplesinfo")
223	bortigno	1.32	repDict['job_id']=opts.nevents_split
224	bortigno	1.30	info = ParseInfo(samplesinfo,path)
225	bortigno	1.32	if ( opts.samples == "" ):
226	bortigno	1.30	for job in info:
227	bortigno	1.32	if (job.subsample): continue #avoid multiple submissions from subsamples
228	bortigno	1.30	submit(job.name,repDict)
229			else:
230			for sample in samplesList:
231			submit(sample,repDict)
232
233			#BDT optimisation
234			elif opts.task == 'mva_opt':
235			total_number_of_steps=1
236			setting = ''
237			for par in (config.get('Optimisation','parameters').split(',')):
238			scan_par=eval(config.get('Optimisation',par))
239			setting+=par+'='+str(scan_par[0])+':'
240			if len(scan_par) > 1 and scan_par[2] != 0:
241			total_number_of_steps+=scan_par[2]
242			setting=setting[:-1] # eliminate last column at the end of the setting string
243			print setting
244			repDict['additional']=setting
245			repDict['job_id']=config.get('Optimisation','training')
246			submit('OPT_main_set',repDict)
247			main_setting=setting
248
249			#Scanning all the parameters found in the training config in the Optimisation sector
250			for par in (config.get('Optimisation','parameters').split(',')):
251			scan_par=eval(config.get('Optimisation',par))
252			print par
253			if len(scan_par) > 1 and scan_par[2] != 0:
254			for step in range(scan_par[2]):
255			value = (scan_par[0])+((1+step)*(scan_par[1]-scan_par[0])/scan_par[2])
256			print value
257			setting=re.sub(par+'.*?:',par+'='+str(value)+':',main_setting)
258			repDict['additional']=setting
259			# repDict['job_id']=config.get('Optimisation','training')
260			submit('OPT_'+par+str(value),repDict)
261			# submit(config.get('Optimisation','training'),repDict)
262			print setting
263
264
265			os.system('qstat')
266			if (opts.philipp_love_progress_bars):
267			os.system('./qstat.py')