ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists
Revision: 1.30
Committed: Tue Jul 9 19:25:00 2013 UTC (11 years, 9 months ago) by ahart
Branch: MAIN
CVS Tags: V02-03-02, HEAD
Changes since 1.29: +1 -1 lines
Log Message:
In the countEvents function, return a hash from the cutflow histogram name to negative one instead of returning negative one. The return value of this function should now always be a hash.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Getopt::Long;
5 ahart 1.3 use POSIX;
6    
7     sub processArgs;
8     sub printHelp;
9     sub getRunList;
10     sub countEvents;
11 ahart 1.27 sub isEDM;
12 ahart 1.1
13     my %opt;
14     Getopt::Long::Configure ("bundling");
15 ahart 1.6 GetOptions (\%opt, "cutflow|c=s", "luminosity|l=s", "prefix|p=s", "weight|w=s", "xsection|x=s", "help|h");
16 ahart 1.1
17     printHelp () if $opt{"help"} || !$opt{"prefix"};
18     my $files = processArgs (\@ARGV);
19 ahart 1.11 my %rootFiles;
20 ahart 1.29 my %nTotalEvents;
21     my $nTotalEvents = 0;
22 ahart 1.28 my %skimEventCounts;
23 ahart 1.11 my %weights;
24 ahart 1.13 my @weights;
25 ahart 1.5 my $nGoodJobs = 0;
26     my $nBadJobs = 0;
27 ahart 1.17 my $nIncompleteJobs = 0;
28 ahart 1.3 my $counting = 0;
29 ahart 1.4 my %exitCodes;
30 ahart 1.8 my %signals;
31 ahart 1.17 my %partial;
32 ahart 1.5 my %crossSections;
33 ahart 1.14 my %dirs;
34 ahart 1.29 my %channels;
35 ahart 1.5 my $integratedLuminosity = 10000;
36     $integratedLuminosity = $opt{"luminosity"} if $opt{"luminosity"};
37 ahart 1.6 my $cutFlow = "cutFlow";
38     $cutFlow = $opt{"cutflow"} if $opt{"cutflow"};
39 ahart 1.3 foreach my $file (@$files)
40     {
41     next if $file eq ".";
42     next if $file eq "..";
43 ahart 1.5 my $dir = $file;
44     $dir =~ s/^(.*)\/[^\/]*$/$1/;
45 ahart 1.4 if ($file =~ m/^.*\/condor_[^_]*\.log$/)
46 ahart 1.3 {
47     my $jobNumber = $file;
48     $jobNumber =~ s/^.*\/condor_([^_]*)\.log$/$1/;
49     open (FILE, "<$file");
50     my @fileContents = <FILE>;
51     close (FILE);
52     my $fileContents = join ("", @fileContents);
53     $fileContents =~ s/\n/ /g;
54 ahart 1.19 $counting = 1;
55 ahart 1.8 if ($fileContents =~ m/return value/)
56 ahart 1.3 {
57 ahart 1.8 $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
58 ahart 1.9 $nGoodJobs++;
59     print "WARNING: Nonzero exit code for job $jobNumber. (return value $fileContents)\n" if $fileContents != 0;
60 ahart 1.8 $exitCodes{$dir}{$jobNumber} = $fileContents;
61     }
62 ahart 1.17 elsif ($fileContents =~ m/signal/)
63 ahart 1.8 {
64     $fileContents =~ s/.*\(signal ([^)]*)\).*/$1/g;
65 ahart 1.5 $nBadJobs++;
66 ahart 1.8 print "WARNING: Skipping job $jobNumber. (signal $fileContents)\n";
67     $signals{$dir}{$jobNumber} = $fileContents;
68 ahart 1.3 }
69 ahart 1.17 else
70     {
71     $nIncompleteJobs++;
72     $partial{$dir}{$jobNumber} = 1;
73     }
74 ahart 1.3 }
75 ahart 1.5 if ($file =~ m/^.*\/crossSectionInPicobarn\.txt$/)
76     {
77     open (CROSS_SECTION, "<$file");
78     my $crossSection = <CROSS_SECTION>;
79     close (CROSS_SECTION);
80     $crossSections{$dir} = $crossSection;
81     }
82 ahart 1.3 }
83 ahart 1.1 foreach my $file (@$files)
84     {
85     next if $file eq ".";
86     next if $file eq "..";
87 ahart 1.5 my $dir = $file;
88     $dir =~ s/^(.*)\/[^\/]*$/$1/;
89 ahart 1.3 my $badJob = 0;
90 ahart 1.20 my $jobNumber;
91 ahart 1.27 my $fileIsEDM = 0;
92     $fileIsEDM = isEDM ($file) if $file =~ m/^.*\.root$/;
93 ahart 1.29 if ($fileIsEDM < 0)
94     {
95     unlink ($file);
96     next;
97     }
98 ahart 1.27 if ($file =~ m/^.*_[^_]*\.root$/ && !$fileIsEDM)
99 ahart 1.1 {
100 ahart 1.20 $jobNumber = $file;
101 ahart 1.3 $jobNumber =~ s/^.*_([^_]*)\.root$/$1/;
102 ahart 1.17 $badJob = (defined $signals{$dir} && defined $signals{$dir}{$jobNumber}) || (defined $partial{$dir} && defined $partial{$dir}{$jobNumber});
103 ahart 1.5 }
104     next if $badJob;
105 ahart 1.27 if ($file =~ m/^.*\.root$/ && !$fileIsEDM)
106 ahart 1.5 {
107 ahart 1.11 foreach my $arg (@ARGV)
108     {
109 ahart 1.19 if (substr ($file, 0, length ($arg)) eq $arg)
110 ahart 1.11 {
111 ahart 1.19 $dirs{$arg} = $dir;
112 ahart 1.29 my $count = countEvents ($file, $cutFlow);
113     if ($count->{$cutFlow} < 0)
114 ahart 1.20 {
115     $nGoodJobs--;
116     $nBadJobs++;
117     print "WARNING: Skipping job $jobNumber. (bad ROOT file)\n";
118     last;
119     }
120 ahart 1.29 $nTotalEvents{$arg} += $count->{$cutFlow} if defined $nTotalEvents{$arg};
121     $nTotalEvents{$arg} = $count->{$cutFlow} if !(defined $nTotalEvents{$arg});
122     $nTotalEvents += $count->{$cutFlow};
123 ahart 1.11 push (@{$rootFiles{$arg}}, $file);
124 ahart 1.24 if ($opt{"weight"})
125     {
126     push (@weights, $opt{"weight"}) if !($opt{"weight"} =~ m/,/);
127     push (@weights, 1.0) if $opt{"weight"} =~ m/,/;
128     }
129 ahart 1.13 push (@weights, $opt{"xsection"} * $integratedLuminosity) if !$opt{"weight"} && $opt{"xsection"};
130     push (@weights, $crossSections{$dir} * $integratedLuminosity) if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dir};
131     push (@weights, 1.0) if !$opt{"weight"} && !$opt{"xsection"} && !(defined $crossSections{$dir});
132     $weights{$arg} = $weights[-1];
133 ahart 1.29
134     foreach my $channelCutFlow (keys %{$count})
135     {
136     my $channel = substr ($channelCutFlow, 0, length ($channelCutFlow) - length ($cutFlow));
137     next if !length ($channel);
138     $skimEventCounts{$arg} = {} if !(defined $skimEventCounts{$arg});
139     $skimEventCounts{$arg}{$channel} += $count->{$channelCutFlow} if defined $skimEventCounts{$arg};
140     $skimEventCounts{$arg}{$channel} = $count->{$channelCutFlow} if !(defined $skimEventCounts{$arg});
141     }
142 ahart 1.11 }
143     }
144 ahart 1.5 }
145     }
146 ahart 1.11 if (!%rootFiles)
147 ahart 1.6 {
148     print "Found no ROOT files to merge!\n";
149     exit;
150     }
151 ahart 1.13 my @mergedFiles;
152     my @mergedWeights;
153 ahart 1.11 foreach my $arg (@ARGV)
154     {
155     my $rootFiles = join (" ", @{$rootFiles{$arg}});
156 ahart 1.28 my $tmpName = "." . $arg . "_" . "$opt{'prefix'}.root";
157 ahart 1.13 $tmpName =~ s/\//_/g;
158     system ("mergeTFileServiceHistograms -i $rootFiles -o $tmpName");
159 ahart 1.29 my $count = $nTotalEvents{$arg};
160 ahart 1.13 system ("cutFlowLimits $tmpName");
161 ahart 1.29 if ($count)
162     {
163     $weights{$arg} /= $count if !$opt{"weight"} && $opt{"xsection"};
164     $weights{$arg} /= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
165     }
166     else
167     {
168     $weights{$arg} *= $count if !$opt{"weight"} && $opt{"xsection"};
169     $weights{$arg} *= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
170     }
171 ahart 1.19 push (@mergedFiles, $tmpName);
172 ahart 1.14 push (@mergedWeights, $weights{$arg});
173 ahart 1.25 foreach my $rootFile (@{$rootFiles{$arg}})
174     {
175     my $weight = $weights{$arg};
176     system ("weightTrees $rootFile $weight");
177     }
178 ahart 1.29 open (EVENT_COUNT, ">$dirs{$arg}/numberOfEvents.txt");
179     print EVENT_COUNT sprintf ("%.16g", $count) . "\n";
180     close (EVENT_COUNT);
181     foreach my $channel (keys %{$skimEventCounts{$arg}})
182 ahart 1.26 {
183 ahart 1.29 open (SKIM_COUNT, ">$dirs{$arg}/$channel/skimNumberOfEvents.txt");
184     print SKIM_COUNT sprintf ("%.16g", $skimEventCounts{$arg}{$channel}) . "\n";
185     close (SKIM_COUNT);
186 ahart 1.26 }
187 ahart 1.11 }
188 ahart 1.13 my $mergedFiles = join (" ", @mergedFiles);
189     my $mergedWeights = join (",", @mergedWeights);
190 ahart 1.24 $mergedWeights = $opt{"weight"} if $opt{"weight"} && $opt{"weight"} =~ m/,/;
191 ahart 1.13 system ("mergeTFileServiceHistograms -i $mergedFiles -o $opt{'prefix'}.root -w $mergedWeights");
192     foreach my $mergedFile (@mergedFiles)
193 ahart 1.5 {
194 ahart 1.13 unlink ("$mergedFile");
195 ahart 1.1 }
196 ahart 1.19 print "=============================================\n";
197 lantonel 1.23 my $output = sprintf "Cross-section of samples:\n";
198 ahart 1.21 my $printOutput = 0;
199 ahart 1.10 foreach my $arg (keys %weights)
200 lantonel 1.23 {
201 ahart 1.12 my $shortArg = $arg;
202     $shortArg =~ s/^.*\/([^\/]*)$/$1/;
203 lantonel 1.23 $output = sprintf "%s $shortArg: %.5g pb\n", $output, ($crossSections{$dirs{$arg}});
204 ahart 1.21 $printOutput = 1 if $weights{$arg} != 1;
205 lantonel 1.23 }
206     print $output if $printOutput;
207 jbrinson 1.22
208 lantonel 1.23 my $output = sprintf "Effective luminosities of samples:\n";
209     my $printOutput = 0;
210 jbrinson 1.22 foreach my $arg (keys %weights)
211 lantonel 1.23 {
212 jbrinson 1.22 my $shortArg = $arg;
213     $shortArg =~ s/^.*\/([^\/]*)$/$1/;
214 ahart 1.29 $output = sprintf "%s $shortArg: %.5g/fb\n", $output, ($integratedLuminosity / (1000.0 * $weights{$arg})) if $weights{$arg} != 1 && $weights{$arg} != 0;
215 jbrinson 1.22 $printOutput = 1 if $weights{$arg} != 1;
216 lantonel 1.23 }
217     print $output if $printOutput;
218 jbrinson 1.22
219 ahart 1.19 $output = sprintf "Weights for target luminosity of %g/fb:\n", ($integratedLuminosity / 1000.0);
220     $printOutput = 0;
221     foreach my $arg (keys %weights)
222     {
223     my $shortArg = $arg;
224     $shortArg =~ s/^.*\/([^\/]*)$/$1/;
225 ahart 1.29 $output = sprintf "%s $shortArg: %.5g\n", $output, $weights{$arg} if $weights{$arg} != 1 && $weights{$arg} != 0;
226 ahart 1.18 $printOutput = 1 if $weights{$arg} != 1;
227 ahart 1.10 }
228 ahart 1.18 print $output if $printOutput;
229 lantonel 1.23
230 ahart 1.6 my $goodEvents = countEvents ("$opt{'prefix'}.root", $cutFlow);
231 ahart 1.29 printf "$nGoodJobs jobs ran successfully over %.1f (%.1f weighted) events.\n", $nTotalEvents, $goodEvents->{$cutFlow} if $counting;
232 ahart 1.4 print "$nBadJobs jobs failed to run.\n" if $counting;
233 ahart 1.17 print "$nIncompleteJobs jobs have not finished.\n" if $counting;
234 ahart 1.19 print "=============================================\n";
235 ahart 1.1
236     sub
237     processArgs
238     {
239     my $argv = shift;
240    
241     my @files;
242     foreach my $arg (@$argv)
243     {
244     $arg =~ s/\/*$//;
245     if (!(-e $arg))
246     {
247     print "$arg does not exist!\n";
248     exit;
249     }
250     next if ($arg =~ m/\/\.$/ || $arg =~ m/\/\.\.$/);
251     if (-d $arg)
252     {
253     opendir (DIR, $arg);
254     my @dirContents = readdir (DIR);
255     closedir (DIR);
256     for (my $i = 0; $i < @dirContents; $i++)
257     {
258     $dirContents[$i] = "$arg/$dirContents[$i]";
259     }
260     my $newFiles = processArgs (\@dirContents);
261     push (@files, @$newFiles);
262     }
263     else
264     {
265     push (@files, $arg);
266     }
267     }
268    
269     return \@files;
270     }
271    
272     sub
273     printHelp
274     {
275     my $exeName = $0;
276     $exeName =~ s/^.*\/([^\/]*)$/$1/;
277    
278 ahart 1.6 print "Usage: $exeName [OPTION]... -p PREFIX DIRECTORIES_AND_FILES\n";
279 ahart 1.7 print "Merges ROOT files containing histograms. If there are Condor logs in the\n";
280     print "specified directories, checks for nonzero return values. If the directories\n";
281     print "where created by \"osusub\", uses the cross section from the database to weight\n";
282     print "all histograms.\n";
283 ahart 1.1 print "\n";
284     print "Mandatory arguments to long options are mandatory for short options too.\n";
285 ahart 1.6 printf "%-29s%s\n", " -c, --cutflow HISTOGRAM", "name of histogram to use for the cutflow (default:";
286     printf "%-29s%s\n", " ", "cutFlow)";
287 ahart 1.1 printf "%-29s%s\n", " -h, --help", "print this help message";
288 ahart 1.24 printf "%-29s%s\n", " -l, --luminosity", "integrated luminosity to which the histograms are";
289     printf "%-29s%s\n", " ", "weighted (default: 10000/pb)";
290 ahart 1.7 printf "%-29s%s\n", " -p, --prefix PREFIX", "output is named PREFIX.root";
291 ahart 1.5 printf "%-29s%s\n", " -w, --weight WEIGHT", "scale the output by WEIGHT, overriding the";
292     printf "%-29s%s\n", " ", "automatic weighting using the cross section from";
293 ahart 1.24 printf "%-29s%s\n", " ", "the database; WEIGHT may be a single number or a";
294     printf "%-29s%s\n", " ", "comma-separated list, one for each input file";
295 ahart 1.6 printf "%-29s%s\n", " -x, --xsection XSECTION", "use XSECTION to weight the histograms instead of";
296     printf "%-29s%s\n", " ", "the value in the database";
297 ahart 1.1
298     exit;
299     }
300 ahart 1.3
301     sub
302     getRunList
303     {
304     my $runListFile = shift;
305    
306     open (RUN_LIST, "<$runListFile");
307     my @runList0 = <RUN_LIST>;
308     close (RUN_LIST);
309     my @runList;
310     foreach my $file (@runList0)
311     {
312     next if !($file =~ m/^.*file:.*\.root.*/);
313     $file =~ s/.*file:(.*)\.root.*/$1.root/;
314     push (@runList, $file);
315     }
316    
317     return \@runList;
318     }
319    
320     sub
321     countEvents
322     {
323 ahart 1.4 my $file = shift;
324 ahart 1.6 my $cutFlow = shift;
325 ahart 1.4
326 ahart 1.29 my @output = `getEventsFromCutFlow $file $cutFlow`;
327     if ($output[0] =~ m/Did not find a histogram named/ || $output[0] =~ m/appears to be empty/ || $output[0] =~ m/Failed to open/)
328 ahart 1.5 {
329 ahart 1.29 print $output[0];
330 ahart 1.30 return {$cutFlow => -1};
331 ahart 1.5 }
332 ahart 1.29 my %weight;
333     my %count;
334     foreach my $line (@output)
335     {
336     my $cutFlowName = $line;
337     my $weight = $line;
338     my $count = $line;
339     $cutFlowName =~ s/(.*): .* \/ .*\n*/$1/;
340     $weight =~ s/.*: .* \/ (.*)\n*/$1/;
341     $count =~ s/.*: (.*) \/ .*\n*/$1/;
342     $count{$cutFlowName} = $count + 0;
343     $weight{$cutFlowName} = $weight + 0;
344     }
345     foreach my $cutFlowName (keys %count)
346     {
347     $count{$cutFlowName} *= $count{$cutFlow} / $weight{$cutFlowName} if $weight{$cutFlowName};
348     $count{$cutFlowName} *= $count{$cutFlow} * $weight{$cutFlowName} if !$weight{$cutFlowName};
349     }
350 ahart 1.4
351 ahart 1.29 return \%count;
352 ahart 1.3 }
353 ahart 1.27
354     sub
355     isEDM
356     {
357     my $file = shift;
358    
359     my $output = `edmFileUtil $file 2>&1`;
360     return 0 if $output =~ m/appears to be missing/ || $output =~ m/not a ROOT file/;
361 ahart 1.28 $output =~ s/[\f\n\r]//g;
362     $output =~ s/^.*, ([^,]*) events,.*$/$1/;
363     $output += 0;
364     $output-- if $output == 0;
365     return $output;
366 ahart 1.27 }