Friis/TancNote/build_nn_observables.py

#!/usr/bin/env python
import os
import glob
import re
from string import Template

LATEX_NAME_MAPPING = {
    'OneProngNoPiZero':
    r'$\tau^{-} \rightarrow \pi^{-}\nu_\tau$',
    'OneProngOnePiZero':
    r'$\tau^{-} \rightarrow \pi^{-}\pi^0\nu_\tau$',
    'OneProngTwoPiZero':
    r'$\tau^{-} \rightarrow \pi^{-}\pi^0\pi^0\nu_\tau$',
    'ThreeProngNoPiZero':
    r'$\tau^{-} \rightarrow \pi^{-}\pi^{+}\pi^{-}\nu_\tau$',
    'ThreeProngOnePiZero':
    r'$\tau^{-} \rightarrow \pi^{-}\pi^{+}\pi^{-}\pi^0\nu_\tau$',
}

LATEX_NAME_MAPPING_NO_TAU = {
    'OneProngNoPiZero':
    r'$\pi^{-}\nu_\tau$',
    'OneProngOnePiZero':
    r'$\pi^{-}\pi^0\nu_\tau$',
    'OneProngTwoPiZero':
    r'$\pi^{-}\pi^0\pi^0\nu_\tau$',
    'ThreeProngNoPiZero':
    r'$\pi^{-}\pi^{+}\pi^{-}\nu_\tau$',
    'ThreeProngOnePiZero':
    r'$\pi^{-}\pi^{+}\pi^{-}\pi^0\nu_\tau$',
}

PLOT_TEMPLATE = Template(
r'''\put($x, $y) {\mbox{\includegraphics*[height=60mm]{${file_location}}}}
    \put(${letterx}, ${lettery}){\small ($letter)}
''')


FIGURE_TEMPLATE = Template(
r'''
\begin{figure}[b]
\setlength{\unitlength}{1mm}
\begin{center}

\begin{picture}($width, $height)(0,0)
${figures}
\end{picture}

\caption{ 
    Training sample distributions of signal (red) and background (blue) for different observables used
    (see section~\ref{sec:$decayModeSection}) in the neural network corresponding to the $decaymode decay mode.
}

\label{fig:${label}}
\end{center}
\end{figure}
''')

VARIABLE_TEMPLATE = Template(
r'''
\begin{itemize}
    $items
\end{itemize}
''')

VARIABLE_ITEM_TEMPLATE = Template(
r'''
    \item $item (Figure~$description)
''')

#VARIABLE_TEMPLATE = Template(
#r'''
#\begin{table}[h]
#   \centering
#   \begin{tabular}{l|r}
#   Input observable & Figure index \\
#   \hline
#    $items
#   \end{tabular}
#\end{table}
#''')
#
#VARIABLE_ITEM_TEMPLATE = Template(
#r'''
#$item & $description \\
#''')

DESCRIPTION_TEMPLATE = Template(
r'''
\begin{description}
    $items
\end{description}
    
''')

DESCRIPTION_ITEM_TEMPLATE = Template(
r'''
  \item[$item] \hfill \\
  $description
''')

VAR_DESCRIPTIONS = {
    'ChargedOutlierAngleN': 
    r'''$\Delta R$ between the Nth charged object (ordered by $P_T$) in the isolation region
    and the tau--candidate momentum axis. If the number of
    isolation region objects is less than N, the input is set at one.''',

    'ChargedOutlierPtN': r'''Transverse momentum of the Nth charged object in the isolation region.  If the number of
    isolation region objects is less than N, the input is set at zero.''',

    'DalitzN': r''' Invariant mass of four vector sum of the ``main track'' and the Nth signal
    region object ''',

    'Eta': r'''Pseudo-rapidity of the signal region objects ''',

    'InvariantMassOfSignal': r'''Invariant mass of the composite object formed by the signal region constituents''',
    
    'MainTrackAngle': r'''$\Delta R$ between the ``main track'' and the composite four--vector formed by the 
    signal region constituents''',
    
    'MainTrackPt': r'''Transverse momentum of the ``main track'' ''',

    'OutlierNCharged': r'''Number of charged objects in the isolation region''',

    'OutlierSumPt': r'''Sum of the transverse momentum of objects in the isolation region''',

    'PiZeroAngleN': r'''$\Delta R$ between the Nth $\pi^0$ object in the signal region (ordered by $P_T$) and
    the tau--candidate momentum axis''',

    'PiZeroPtN': r'''Transverse momentum of the Nth $\pi^0$ object in the signal region.''',

    'TrackAngleN': r'''$\Delta R$ between the Nth charged object in the signal region (ordered by $P_T$) and
    the tau--candidate momentum axis, exclusive of the main track.''',

    'TrackPtN': r'''Transverse momentum of the Nth charged object in the signal region, exclusive of the 
    main track''',
}

def groups_of(group_size, iterable):
    count = 0
    output = []
    for item in iterable:
        output.append(item)
        count += 1
        if count == group_size:
            count = 0
            yield output
            output = []
    # Yield partial at end
    if output:
        yield output

def put_eta_first(x, y):
    if x == 'Eta':
        return 1
    if y == 'Eta':
        return -1
    return cmp(x,y)

if __name__=="__main__":
    # parse file names
    file_name_matcher = re.compile(r'(?P<network>[^_]*)_(?P<variable>\w*).pdf')

    figures_list = glob.glob('figures/NeuralNetObservables/*.pdf')

    # Keep track of all our plots
    info_dict = {}

    variable_list = []

    # Keep track of full variable names
    variable_list_raw = []

    for figure_file in figures_list:
        figure_file_name = os.path.basename(figure_file)
        parse = file_name_matcher.match(figure_file_name)
        network = parse.group('network')
        variable = parse.group('variable')

        # skip correlation variable
        if variable == 'correlation':
            continue

        # Get the dict for this network, otherwise create a new one
        network_dict = info_dict.setdefault(network, {})
        network_dict[variable] = figure_file

        # Remove trailing indices from variables

        variable_list_raw.append(variable)
        variable_list.append(re.sub("[0-9]+$", "N", variable))

    # Build description of variables
    variable_set = list(set(variable_list))
    variable_set.sort()
    print variable_set

    variable_set_raw = list(set(variable_list_raw))
    variable_set_raw.sort()

    description_output_file = open(os.path.join(
            'note/observable_distributions/','var_descriptions.tex'), 'w')
    description_items = ""
    for variable in variable_set:
        description_items += DESCRIPTION_ITEM_TEMPLATE.substitute(
            item = variable, description=VAR_DESCRIPTIONS[variable]
        )

    description_output_file.write(DESCRIPTION_TEMPLATE.substitute(
        items = description_items))

    # Loop over neural nets and build each section
    for network, network_info in info_dict.iteritems():
        print ""
        print "Building %s" % network
        output_file = open(os.path.join(
            'note/observable_distributions/', network+'.tex'), 'w')

        section_label = '%s_input_descriptions' % network
        output = "\label{sec:%s}\n" % section_label

        # Count variables
        variables = network_info.keys()
        # Sort nicely
        variables.sort(put_eta_first)
        n_vars = len(variables)


        variable_table_entries=""

        # max group size of 5
        for figure_index, plot_group in enumerate(groups_of(6, variables)):
            letter_values = list("abcdefghijklmnopqrstuv")
            current_height = 0
            figures_list = ""
            figure_label = "%s_%i" % (network, figure_index)
            for plot_row in groups_of(2, plot_group):

                letter = letter_values.pop(0)
                figures_list += PLOT_TEMPLATE.substitute(
                    x = 0.5, y = current_height, 
                    letterx=0.5+10, lettery=current_height+60, letter=letter,
                    file_location = network_info[plot_row[0]])

                # Update variable table
                variable_table_entries += VARIABLE_ITEM_TEMPLATE.substitute(
                    item=plot_row[0], 
                    description=r"\ref{fig:%s}%s"% (figure_label, letter))

                letter = letter_values.pop(0)
                figures_list += PLOT_TEMPLATE.substitute(
                    x = 65, y = current_height, 
                    letterx=65+10, lettery=current_height+60, letter=letter,
                    file_location = network_info[plot_row[1]])

                # Update variable table
                variable_table_entries += VARIABLE_ITEM_TEMPLATE.substitute(
                    item=plot_row[1], 
                    description=r"\ref{fig:%s}%s"% (figure_label, letter))

                # go to next row
                current_height += 65

            output += FIGURE_TEMPLATE.substitute(
                width=130, height=current_height,
                figures = figures_list,
                decaymode = LATEX_NAME_MAPPING[network],
                decayModeSection=section_label, label = figure_label)

        variable_index = VARIABLE_TEMPLATE.substitute(
            items = variable_table_entries)

        output_file.write(variable_index)

        output_file.write(output)

    # Build table
    table_file = open("note/observable_distributions/nn_var_table.tex", "w")

    table_file.write(r'''
\begin{table}[h]
   \centering
''')
    networks = [
        'OneProngNoPiZero',
        'OneProngOnePiZero',
        'OneProngTwoPiZero',
        'ThreeProngNoPiZero',
        'ThreeProngOnePiZero',
    ]

    # Write column defintion
    table_file.write(
        r"\begin{tabular}{l|" 
        + "|".join(
            ["c" for network in networks])
        + r"|}")
    table_file.write("\n")

    # Write header
    table_file.write(
        r"\multirow{2}{*}{Input observable} & \multicolumn{%i}{c}{Neural network} \\"
        % len(networks)) 
    table_file.write("\n")

    table_file.write(
        " & "  
        + " & ".join(
            [r"%s" % LATEX_NAME_MAPPING_NO_TAU[network] for network in networks])
        + r"\\")
    table_file.write("\n")

    table_file.write(r"\hline")
    table_file.write("\n")

    # build each row
    for variable in variable_set_raw:
        row_output = variable + "&" 
        # list that contains a flag if a given network has this variable
        def has_it(network):
            if variable in info_dict[network]:
                return r"$\bullet$"
            else:
                return ""
        row_output += " & ".join([
            has_it(network) for network in networks])
        row_output += r"\\"
        table_file.write(row_output)
        table_file.write("\n")

    table_file.write(r"\end{tabular}")
    
    table_file.write(
r'''
\caption{
Input obervables used for each of the neural networks implemented by the Tau Neural Classifier.
The columns represents the neural networks associated to various decay modes and the rows represent
the superset of input observables(see section~\ref{sec:tanc_nn_discriminants}) used in the neural networks.
A dot in a given row and column indicates that the observable in that row is used in the neural network corresponding
to that column.
}
\label{tab:nn_var_table}
''')
    
    table_file.write("\n")
    table_file.write(r"\end{table}")


Revision:	1.2
Committed:	Sat Apr 24 00:27:15 2010 UTC (15 years ago) by friis
Content type:	text/x-python
Branch:	MAIN
Changes since 1.1:	+102 -5 lines
Log Message:	Some, but not all of Christians suggestions implmented
#	User	Rev	Content
1	friis	1.1	#!/usr/bin/env python
2			import os
3			import glob
4			import re
5			from string import Template
6
7			LATEX_NAME_MAPPING = {
8			'OneProngNoPiZero':
9	friis	1.2	r'$\tau^{-} \rightarrow \pi^{-}\nu_\tau$',
10	friis	1.1	'OneProngOnePiZero':
11	friis	1.2	r'$\tau^{-} \rightarrow \pi^{-}\pi^0\nu_\tau$',
12	friis	1.1	'OneProngTwoPiZero':
13	friis	1.2	r'$\tau^{-} \rightarrow \pi^{-}\pi^0\pi^0\nu_\tau$',
14	friis	1.1	'ThreeProngNoPiZero':
15	friis	1.2	r'$\tau^{-} \rightarrow \pi^{-}\pi^{+}\pi^{-}\nu_\tau$',
16	friis	1.1	'ThreeProngOnePiZero':
17	friis	1.2	r'$\tau^{-} \rightarrow \pi^{-}\pi^{+}\pi^{-}\pi^0\nu_\tau$',
18			}
19
20			LATEX_NAME_MAPPING_NO_TAU = {
21			'OneProngNoPiZero':
22			r'$\pi^{-}\nu_\tau$',
23			'OneProngOnePiZero':
24			r'$\pi^{-}\pi^0\nu_\tau$',
25			'OneProngTwoPiZero':
26			r'$\pi^{-}\pi^0\pi^0\nu_\tau$',
27			'ThreeProngNoPiZero':
28			r'$\pi^{-}\pi^{+}\pi^{-}\nu_\tau$',
29			'ThreeProngOnePiZero':
30			r'$\pi^{-}\pi^{+}\pi^{-}\pi^0\nu_\tau$',
31	friis	1.1	}
32
33			PLOT_TEMPLATE = Template(
34			r'''\put($x, $y) {\mbox{\includegraphics*[height=60mm]{${file_location}}}}
35			\put(${letterx}, ${lettery}){\small ($letter)}
36			''')
37
38
39			FIGURE_TEMPLATE = Template(
40			r'''
41			\begin{figure}[b]
42			\setlength{\unitlength}{1mm}
43			\begin{center}
44
45			\begin{picture}($width, $height)(0,0)
46			${figures}
47			\end{picture}
48
49			\caption{
50			Training sample distributions of signal (red) and background (blue) for different observables used
51			(see section~\ref{sec:$decayModeSection}) in the neural network corresponding to the $decaymode decay mode.
52			}
53
54			\label{fig:${label}}
55			\end{center}
56			\end{figure}
57			''')
58
59			VARIABLE_TEMPLATE = Template(
60			r'''
61			\begin{itemize}
62			$items
63			\end{itemize}
64			''')
65
66			VARIABLE_ITEM_TEMPLATE = Template(
67			r'''
68			\item $item (Figure~$description)
69			''')
70
71			#VARIABLE_TEMPLATE = Template(
72			#r'''
73			#\begin{table}[h]
74			# \centering
75			# \begin{tabular}{l\|r}
76			# Input observable & Figure index \\
77			# \hline
78			# $items
79			# \end{tabular}
80			#\end{table}
81			#''')
82			#
83			#VARIABLE_ITEM_TEMPLATE = Template(
84			#r'''
85			#$item & $description \\
86			#''')
87
88			DESCRIPTION_TEMPLATE = Template(
89			r'''
90			\begin{description}
91			$items
92			\end{description}
93
94			''')
95
96			DESCRIPTION_ITEM_TEMPLATE = Template(
97			r'''
98			\item[$item] \hfill \\
99			$description
100			''')
101
102			VAR_DESCRIPTIONS = {
103			'ChargedOutlierAngleN':
104			r'''$\Delta R$ between the Nth charged object (ordered by $P_T$) in the isolation region
105			and the tau--candidate momentum axis. If the number of
106			isolation region objects is less than N, the input is set at one.''',
107
108			'ChargedOutlierPtN': r'''Transverse momentum of the Nth charged object in the isolation region. If the number of
109			isolation region objects is less than N, the input is set at zero.''',
110
111			'DalitzN': r''' Invariant mass of four vector sum of the ``main track'' and the Nth signal
112			region object ''',
113
114			'Eta': r'''Pseudo-rapidity of the signal region objects ''',
115
116			'InvariantMassOfSignal': r'''Invariant mass of the composite object formed by the signal region constituents''',
117
118			'MainTrackAngle': r'''$\Delta R$ between the ``main track'' and the composite four--vector formed by the
119			signal region constituents''',
120
121			'MainTrackPt': r'''Transverse momentum of the ``main track'' ''',
122
123			'OutlierNCharged': r'''Number of charged objects in the isolation region''',
124
125			'OutlierSumPt': r'''Sum of the transverse momentum of objects in the isolation region''',
126
127			'PiZeroAngleN': r'''$\Delta R$ between the Nth $\pi^0$ object in the signal region (ordered by $P_T$) and
128			the tau--candidate momentum axis''',
129
130			'PiZeroPtN': r'''Transverse momentum of the Nth $\pi^0$ object in the signal region.''',
131
132			'TrackAngleN': r'''$\Delta R$ between the Nth charged object in the signal region (ordered by $P_T$) and
133			the tau--candidate momentum axis, exclusive of the main track.''',
134
135			'TrackPtN': r'''Transverse momentum of the Nth charged object in the signal region, exclusive of the
136			main track''',
137			}
138
139			def groups_of(group_size, iterable):
140			count = 0
141			output = []
142			for item in iterable:
143			output.append(item)
144			count += 1
145			if count == group_size:
146			count = 0
147			yield output
148			output = []
149			# Yield partial at end
150			if output:
151			yield output
152
153			def put_eta_first(x, y):
154			if x == 'Eta':
155			return 1
156			if y == 'Eta':
157			return -1
158			return cmp(x,y)
159
160			if __name__=="__main__":
161			# parse file names
162			file_name_matcher = re.compile(r'(?P<network>[^_])_(?P<variable>\w).pdf')
163
164			figures_list = glob.glob('figures/NeuralNetObservables/*.pdf')
165
166			# Keep track of all our plots
167			info_dict = {}
168
169			variable_list = []
170
171	friis	1.2	# Keep track of full variable names
172			variable_list_raw = []
173
174	friis	1.1	for figure_file in figures_list:
175			figure_file_name = os.path.basename(figure_file)
176			parse = file_name_matcher.match(figure_file_name)
177			network = parse.group('network')
178			variable = parse.group('variable')
179
180			# skip correlation variable
181			if variable == 'correlation':
182			continue
183
184			# Get the dict for this network, otherwise create a new one
185			network_dict = info_dict.setdefault(network, {})
186			network_dict[variable] = figure_file
187
188			# Remove trailing indices from variables
189
190	friis	1.2	variable_list_raw.append(variable)
191	friis	1.1	variable_list.append(re.sub("[0-9]+$", "N", variable))
192
193			# Build description of variables
194			variable_set = list(set(variable_list))
195			variable_set.sort()
196			print variable_set
197
198	friis	1.2	variable_set_raw = list(set(variable_list_raw))
199			variable_set_raw.sort()
200
201	friis	1.1	description_output_file = open(os.path.join(
202			'note/observable_distributions/','var_descriptions.tex'), 'w')
203			description_items = ""
204			for variable in variable_set:
205			description_items += DESCRIPTION_ITEM_TEMPLATE.substitute(
206			item = variable, description=VAR_DESCRIPTIONS[variable]
207			)
208
209			description_output_file.write(DESCRIPTION_TEMPLATE.substitute(
210			items = description_items))
211
212			# Loop over neural nets and build each section
213			for network, network_info in info_dict.iteritems():
214			print ""
215			print "Building %s" % network
216			output_file = open(os.path.join(
217			'note/observable_distributions/', network+'.tex'), 'w')
218
219			section_label = '%s_input_descriptions' % network
220			output = "\label{sec:%s}\n" % section_label
221
222			# Count variables
223			variables = network_info.keys()
224			# Sort nicely
225			variables.sort(put_eta_first)
226			n_vars = len(variables)
227
228
229			variable_table_entries=""
230
231			# max group size of 5
232			for figure_index, plot_group in enumerate(groups_of(6, variables)):
233			letter_values = list("abcdefghijklmnopqrstuv")
234			current_height = 0
235			figures_list = ""
236			figure_label = "%s_%i" % (network, figure_index)
237			for plot_row in groups_of(2, plot_group):
238
239			letter = letter_values.pop(0)
240			figures_list += PLOT_TEMPLATE.substitute(
241			x = 0.5, y = current_height,
242			letterx=0.5+10, lettery=current_height+60, letter=letter,
243			file_location = network_info[plot_row[0]])
244
245			# Update variable table
246			variable_table_entries += VARIABLE_ITEM_TEMPLATE.substitute(
247			item=plot_row[0],
248			description=r"\ref{fig:%s}%s"% (figure_label, letter))
249
250			letter = letter_values.pop(0)
251			figures_list += PLOT_TEMPLATE.substitute(
252			x = 65, y = current_height,
253			letterx=65+10, lettery=current_height+60, letter=letter,
254			file_location = network_info[plot_row[1]])
255
256			# Update variable table
257			variable_table_entries += VARIABLE_ITEM_TEMPLATE.substitute(
258			item=plot_row[1],
259			description=r"\ref{fig:%s}%s"% (figure_label, letter))
260
261			# go to next row
262			current_height += 65
263
264			output += FIGURE_TEMPLATE.substitute(
265			width=130, height=current_height,
266			figures = figures_list,
267			decaymode = LATEX_NAME_MAPPING[network],
268			decayModeSection=section_label, label = figure_label)
269
270			variable_index = VARIABLE_TEMPLATE.substitute(
271			items = variable_table_entries)
272
273			output_file.write(variable_index)
274
275			output_file.write(output)
276
277	friis	1.2	# Build table
278			table_file = open("note/observable_distributions/nn_var_table.tex", "w")
279
280			table_file.write(r'''
281			\begin{table}[h]
282			\centering
283			''')
284			networks = [
285			'OneProngNoPiZero',
286			'OneProngOnePiZero',
287			'OneProngTwoPiZero',
288			'ThreeProngNoPiZero',
289			'ThreeProngOnePiZero',
290			]
291
292			# Write column defintion
293			table_file.write(
294			r"\begin{tabular}{l\|"
295			+ "\|".join(
296			["c" for network in networks])
297			+ r"\|}")
298			table_file.write("\n")
299
300			# Write header
301			table_file.write(
302			r"\multirow{2}{*}{Input observable} & \multicolumn{%i}{c}{Neural network} \\"
303			% len(networks))
304			table_file.write("\n")
305
306			table_file.write(
307			" & "
308			+ " & ".join(
309			[r"%s" % LATEX_NAME_MAPPING_NO_TAU[network] for network in networks])
310			+ r"\\")
311			table_file.write("\n")
312
313			table_file.write(r"\hline")
314			table_file.write("\n")
315
316			# build each row
317			for variable in variable_set_raw:
318			row_output = variable + "&"
319			# list that contains a flag if a given network has this variable
320			def has_it(network):
321			if variable in info_dict[network]:
322			return r"$\bullet$"
323			else:
324			return ""
325			row_output += " & ".join([
326			has_it(network) for network in networks])
327			row_output += r"\\"
328			table_file.write(row_output)
329			table_file.write("\n")
330
331			table_file.write(r"\end{tabular}")
332
333			table_file.write(
334			r'''
335			\caption{
336			Input obervables used for each of the neural networks implemented by the Tau Neural Classifier.
337			The columns represents the neural networks associated to various decay modes and the rows represent
338			the superset of input observables(see section~\ref{sec:tanc_nn_discriminants}) used in the neural networks.
339			A dot in a given row and column indicates that the observable in that row is used in the neural network corresponding
340			to that column.
341			}
342			\label{tab:nn_var_table}
343			''')
344
345			table_file.write("\n")
346			table_file.write(r"\end{table}")
347
348
349
350
351
352
353
354	friis	1.1
355
356