ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists
Revision: 1.30
Committed: Tue Jul 9 19:25:00 2013 UTC (11 years, 9 months ago) by ahart
Branch: MAIN
CVS Tags: V02-03-02, HEAD
Changes since 1.29: +1 -1 lines
Log Message:
In the countEvents function, return a hash from the cutflow histogram name to negative one instead of returning negative one. The return value of this function should now always be a hash.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Getopt::Long;
5 use POSIX;
6
7 sub processArgs;
8 sub printHelp;
9 sub getRunList;
10 sub countEvents;
11 sub isEDM;
12
13 my %opt;
14 Getopt::Long::Configure ("bundling");
15 GetOptions (\%opt, "cutflow|c=s", "luminosity|l=s", "prefix|p=s", "weight|w=s", "xsection|x=s", "help|h");
16
17 printHelp () if $opt{"help"} || !$opt{"prefix"};
18 my $files = processArgs (\@ARGV);
19 my %rootFiles;
20 my %nTotalEvents;
21 my $nTotalEvents = 0;
22 my %skimEventCounts;
23 my %weights;
24 my @weights;
25 my $nGoodJobs = 0;
26 my $nBadJobs = 0;
27 my $nIncompleteJobs = 0;
28 my $counting = 0;
29 my %exitCodes;
30 my %signals;
31 my %partial;
32 my %crossSections;
33 my %dirs;
34 my %channels;
35 my $integratedLuminosity = 10000;
36 $integratedLuminosity = $opt{"luminosity"} if $opt{"luminosity"};
37 my $cutFlow = "cutFlow";
38 $cutFlow = $opt{"cutflow"} if $opt{"cutflow"};
39 foreach my $file (@$files)
40 {
41 next if $file eq ".";
42 next if $file eq "..";
43 my $dir = $file;
44 $dir =~ s/^(.*)\/[^\/]*$/$1/;
45 if ($file =~ m/^.*\/condor_[^_]*\.log$/)
46 {
47 my $jobNumber = $file;
48 $jobNumber =~ s/^.*\/condor_([^_]*)\.log$/$1/;
49 open (FILE, "<$file");
50 my @fileContents = <FILE>;
51 close (FILE);
52 my $fileContents = join ("", @fileContents);
53 $fileContents =~ s/\n/ /g;
54 $counting = 1;
55 if ($fileContents =~ m/return value/)
56 {
57 $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
58 $nGoodJobs++;
59 print "WARNING: Nonzero exit code for job $jobNumber. (return value $fileContents)\n" if $fileContents != 0;
60 $exitCodes{$dir}{$jobNumber} = $fileContents;
61 }
62 elsif ($fileContents =~ m/signal/)
63 {
64 $fileContents =~ s/.*\(signal ([^)]*)\).*/$1/g;
65 $nBadJobs++;
66 print "WARNING: Skipping job $jobNumber. (signal $fileContents)\n";
67 $signals{$dir}{$jobNumber} = $fileContents;
68 }
69 else
70 {
71 $nIncompleteJobs++;
72 $partial{$dir}{$jobNumber} = 1;
73 }
74 }
75 if ($file =~ m/^.*\/crossSectionInPicobarn\.txt$/)
76 {
77 open (CROSS_SECTION, "<$file");
78 my $crossSection = <CROSS_SECTION>;
79 close (CROSS_SECTION);
80 $crossSections{$dir} = $crossSection;
81 }
82 }
83 foreach my $file (@$files)
84 {
85 next if $file eq ".";
86 next if $file eq "..";
87 my $dir = $file;
88 $dir =~ s/^(.*)\/[^\/]*$/$1/;
89 my $badJob = 0;
90 my $jobNumber;
91 my $fileIsEDM = 0;
92 $fileIsEDM = isEDM ($file) if $file =~ m/^.*\.root$/;
93 if ($fileIsEDM < 0)
94 {
95 unlink ($file);
96 next;
97 }
98 if ($file =~ m/^.*_[^_]*\.root$/ && !$fileIsEDM)
99 {
100 $jobNumber = $file;
101 $jobNumber =~ s/^.*_([^_]*)\.root$/$1/;
102 $badJob = (defined $signals{$dir} && defined $signals{$dir}{$jobNumber}) || (defined $partial{$dir} && defined $partial{$dir}{$jobNumber});
103 }
104 next if $badJob;
105 if ($file =~ m/^.*\.root$/ && !$fileIsEDM)
106 {
107 foreach my $arg (@ARGV)
108 {
109 if (substr ($file, 0, length ($arg)) eq $arg)
110 {
111 $dirs{$arg} = $dir;
112 my $count = countEvents ($file, $cutFlow);
113 if ($count->{$cutFlow} < 0)
114 {
115 $nGoodJobs--;
116 $nBadJobs++;
117 print "WARNING: Skipping job $jobNumber. (bad ROOT file)\n";
118 last;
119 }
120 $nTotalEvents{$arg} += $count->{$cutFlow} if defined $nTotalEvents{$arg};
121 $nTotalEvents{$arg} = $count->{$cutFlow} if !(defined $nTotalEvents{$arg});
122 $nTotalEvents += $count->{$cutFlow};
123 push (@{$rootFiles{$arg}}, $file);
124 if ($opt{"weight"})
125 {
126 push (@weights, $opt{"weight"}) if !($opt{"weight"} =~ m/,/);
127 push (@weights, 1.0) if $opt{"weight"} =~ m/,/;
128 }
129 push (@weights, $opt{"xsection"} * $integratedLuminosity) if !$opt{"weight"} && $opt{"xsection"};
130 push (@weights, $crossSections{$dir} * $integratedLuminosity) if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dir};
131 push (@weights, 1.0) if !$opt{"weight"} && !$opt{"xsection"} && !(defined $crossSections{$dir});
132 $weights{$arg} = $weights[-1];
133
134 foreach my $channelCutFlow (keys %{$count})
135 {
136 my $channel = substr ($channelCutFlow, 0, length ($channelCutFlow) - length ($cutFlow));
137 next if !length ($channel);
138 $skimEventCounts{$arg} = {} if !(defined $skimEventCounts{$arg});
139 $skimEventCounts{$arg}{$channel} += $count->{$channelCutFlow} if defined $skimEventCounts{$arg};
140 $skimEventCounts{$arg}{$channel} = $count->{$channelCutFlow} if !(defined $skimEventCounts{$arg});
141 }
142 }
143 }
144 }
145 }
146 if (!%rootFiles)
147 {
148 print "Found no ROOT files to merge!\n";
149 exit;
150 }
151 my @mergedFiles;
152 my @mergedWeights;
153 foreach my $arg (@ARGV)
154 {
155 my $rootFiles = join (" ", @{$rootFiles{$arg}});
156 my $tmpName = "." . $arg . "_" . "$opt{'prefix'}.root";
157 $tmpName =~ s/\//_/g;
158 system ("mergeTFileServiceHistograms -i $rootFiles -o $tmpName");
159 my $count = $nTotalEvents{$arg};
160 system ("cutFlowLimits $tmpName");
161 if ($count)
162 {
163 $weights{$arg} /= $count if !$opt{"weight"} && $opt{"xsection"};
164 $weights{$arg} /= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
165 }
166 else
167 {
168 $weights{$arg} *= $count if !$opt{"weight"} && $opt{"xsection"};
169 $weights{$arg} *= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
170 }
171 push (@mergedFiles, $tmpName);
172 push (@mergedWeights, $weights{$arg});
173 foreach my $rootFile (@{$rootFiles{$arg}})
174 {
175 my $weight = $weights{$arg};
176 system ("weightTrees $rootFile $weight");
177 }
178 open (EVENT_COUNT, ">$dirs{$arg}/numberOfEvents.txt");
179 print EVENT_COUNT sprintf ("%.16g", $count) . "\n";
180 close (EVENT_COUNT);
181 foreach my $channel (keys %{$skimEventCounts{$arg}})
182 {
183 open (SKIM_COUNT, ">$dirs{$arg}/$channel/skimNumberOfEvents.txt");
184 print SKIM_COUNT sprintf ("%.16g", $skimEventCounts{$arg}{$channel}) . "\n";
185 close (SKIM_COUNT);
186 }
187 }
188 my $mergedFiles = join (" ", @mergedFiles);
189 my $mergedWeights = join (",", @mergedWeights);
190 $mergedWeights = $opt{"weight"} if $opt{"weight"} && $opt{"weight"} =~ m/,/;
191 system ("mergeTFileServiceHistograms -i $mergedFiles -o $opt{'prefix'}.root -w $mergedWeights");
192 foreach my $mergedFile (@mergedFiles)
193 {
194 unlink ("$mergedFile");
195 }
196 print "=============================================\n";
197 my $output = sprintf "Cross-section of samples:\n";
198 my $printOutput = 0;
199 foreach my $arg (keys %weights)
200 {
201 my $shortArg = $arg;
202 $shortArg =~ s/^.*\/([^\/]*)$/$1/;
203 $output = sprintf "%s $shortArg: %.5g pb\n", $output, ($crossSections{$dirs{$arg}});
204 $printOutput = 1 if $weights{$arg} != 1;
205 }
206 print $output if $printOutput;
207
208 my $output = sprintf "Effective luminosities of samples:\n";
209 my $printOutput = 0;
210 foreach my $arg (keys %weights)
211 {
212 my $shortArg = $arg;
213 $shortArg =~ s/^.*\/([^\/]*)$/$1/;
214 $output = sprintf "%s $shortArg: %.5g/fb\n", $output, ($integratedLuminosity / (1000.0 * $weights{$arg})) if $weights{$arg} != 1 && $weights{$arg} != 0;
215 $printOutput = 1 if $weights{$arg} != 1;
216 }
217 print $output if $printOutput;
218
219 $output = sprintf "Weights for target luminosity of %g/fb:\n", ($integratedLuminosity / 1000.0);
220 $printOutput = 0;
221 foreach my $arg (keys %weights)
222 {
223 my $shortArg = $arg;
224 $shortArg =~ s/^.*\/([^\/]*)$/$1/;
225 $output = sprintf "%s $shortArg: %.5g\n", $output, $weights{$arg} if $weights{$arg} != 1 && $weights{$arg} != 0;
226 $printOutput = 1 if $weights{$arg} != 1;
227 }
228 print $output if $printOutput;
229
230 my $goodEvents = countEvents ("$opt{'prefix'}.root", $cutFlow);
231 printf "$nGoodJobs jobs ran successfully over %.1f (%.1f weighted) events.\n", $nTotalEvents, $goodEvents->{$cutFlow} if $counting;
232 print "$nBadJobs jobs failed to run.\n" if $counting;
233 print "$nIncompleteJobs jobs have not finished.\n" if $counting;
234 print "=============================================\n";
235
236 sub
237 processArgs
238 {
239 my $argv = shift;
240
241 my @files;
242 foreach my $arg (@$argv)
243 {
244 $arg =~ s/\/*$//;
245 if (!(-e $arg))
246 {
247 print "$arg does not exist!\n";
248 exit;
249 }
250 next if ($arg =~ m/\/\.$/ || $arg =~ m/\/\.\.$/);
251 if (-d $arg)
252 {
253 opendir (DIR, $arg);
254 my @dirContents = readdir (DIR);
255 closedir (DIR);
256 for (my $i = 0; $i < @dirContents; $i++)
257 {
258 $dirContents[$i] = "$arg/$dirContents[$i]";
259 }
260 my $newFiles = processArgs (\@dirContents);
261 push (@files, @$newFiles);
262 }
263 else
264 {
265 push (@files, $arg);
266 }
267 }
268
269 return \@files;
270 }
271
272 sub
273 printHelp
274 {
275 my $exeName = $0;
276 $exeName =~ s/^.*\/([^\/]*)$/$1/;
277
278 print "Usage: $exeName [OPTION]... -p PREFIX DIRECTORIES_AND_FILES\n";
279 print "Merges ROOT files containing histograms. If there are Condor logs in the\n";
280 print "specified directories, checks for nonzero return values. If the directories\n";
281 print "where created by \"osusub\", uses the cross section from the database to weight\n";
282 print "all histograms.\n";
283 print "\n";
284 print "Mandatory arguments to long options are mandatory for short options too.\n";
285 printf "%-29s%s\n", " -c, --cutflow HISTOGRAM", "name of histogram to use for the cutflow (default:";
286 printf "%-29s%s\n", " ", "cutFlow)";
287 printf "%-29s%s\n", " -h, --help", "print this help message";
288 printf "%-29s%s\n", " -l, --luminosity", "integrated luminosity to which the histograms are";
289 printf "%-29s%s\n", " ", "weighted (default: 10000/pb)";
290 printf "%-29s%s\n", " -p, --prefix PREFIX", "output is named PREFIX.root";
291 printf "%-29s%s\n", " -w, --weight WEIGHT", "scale the output by WEIGHT, overriding the";
292 printf "%-29s%s\n", " ", "automatic weighting using the cross section from";
293 printf "%-29s%s\n", " ", "the database; WEIGHT may be a single number or a";
294 printf "%-29s%s\n", " ", "comma-separated list, one for each input file";
295 printf "%-29s%s\n", " -x, --xsection XSECTION", "use XSECTION to weight the histograms instead of";
296 printf "%-29s%s\n", " ", "the value in the database";
297
298 exit;
299 }
300
301 sub
302 getRunList
303 {
304 my $runListFile = shift;
305
306 open (RUN_LIST, "<$runListFile");
307 my @runList0 = <RUN_LIST>;
308 close (RUN_LIST);
309 my @runList;
310 foreach my $file (@runList0)
311 {
312 next if !($file =~ m/^.*file:.*\.root.*/);
313 $file =~ s/.*file:(.*)\.root.*/$1.root/;
314 push (@runList, $file);
315 }
316
317 return \@runList;
318 }
319
320 sub
321 countEvents
322 {
323 my $file = shift;
324 my $cutFlow = shift;
325
326 my @output = `getEventsFromCutFlow $file $cutFlow`;
327 if ($output[0] =~ m/Did not find a histogram named/ || $output[0] =~ m/appears to be empty/ || $output[0] =~ m/Failed to open/)
328 {
329 print $output[0];
330 return {$cutFlow => -1};
331 }
332 my %weight;
333 my %count;
334 foreach my $line (@output)
335 {
336 my $cutFlowName = $line;
337 my $weight = $line;
338 my $count = $line;
339 $cutFlowName =~ s/(.*): .* \/ .*\n*/$1/;
340 $weight =~ s/.*: .* \/ (.*)\n*/$1/;
341 $count =~ s/.*: (.*) \/ .*\n*/$1/;
342 $count{$cutFlowName} = $count + 0;
343 $weight{$cutFlowName} = $weight + 0;
344 }
345 foreach my $cutFlowName (keys %count)
346 {
347 $count{$cutFlowName} *= $count{$cutFlow} / $weight{$cutFlowName} if $weight{$cutFlowName};
348 $count{$cutFlowName} *= $count{$cutFlow} * $weight{$cutFlowName} if !$weight{$cutFlowName};
349 }
350
351 return \%count;
352 }
353
354 sub
355 isEDM
356 {
357 my $file = shift;
358
359 my $output = `edmFileUtil $file 2>&1`;
360 return 0 if $output =~ m/appears to be missing/ || $output =~ m/not a ROOT file/;
361 $output =~ s/[\f\n\r]//g;
362 $output =~ s/^.*, ([^,]*) events,.*$/$1/;
363 $output += 0;
364 $output-- if $output == 0;
365 return $output;
366 }