ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists
Revision: 1.28
Committed: Wed Jul 3 21:30:14 2013 UTC (11 years, 9 months ago) by ahart
Branch: MAIN
Changes since 1.27: +19 -2 lines
Log Message:
If EDM files are found, calculate the number of events and output to a file called "skimNumberOfEvents.txt".

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Getopt::Long;
5 ahart 1.3 use POSIX;
6    
7     sub processArgs;
8     sub printHelp;
9     sub getRunList;
10     sub countEvents;
11 ahart 1.27 sub isEDM;
12 ahart 1.1
13     my %opt;
14     Getopt::Long::Configure ("bundling");
15 ahart 1.6 GetOptions (\%opt, "cutflow|c=s", "luminosity|l=s", "prefix|p=s", "weight|w=s", "xsection|x=s", "help|h");
16 ahart 1.1
17     printHelp () if $opt{"help"} || !$opt{"prefix"};
18     my $files = processArgs (\@ARGV);
19 ahart 1.11 my %rootFiles;
20 ahart 1.28 my %skimEventCounts;
21 ahart 1.11 my %weights;
22 ahart 1.13 my @weights;
23 ahart 1.5 my $nGoodJobs = 0;
24     my $nBadJobs = 0;
25 ahart 1.17 my $nIncompleteJobs = 0;
26 ahart 1.3 my $counting = 0;
27 ahart 1.4 my %exitCodes;
28 ahart 1.8 my %signals;
29 ahart 1.17 my %partial;
30 ahart 1.5 my %crossSections;
31 ahart 1.14 my %dirs;
32 ahart 1.5 my $integratedLuminosity = 10000;
33     $integratedLuminosity = $opt{"luminosity"} if $opt{"luminosity"};
34 ahart 1.6 my $cutFlow = "cutFlow";
35     $cutFlow = $opt{"cutflow"} if $opt{"cutflow"};
36 ahart 1.3 foreach my $file (@$files)
37     {
38     next if $file eq ".";
39     next if $file eq "..";
40 ahart 1.5 my $dir = $file;
41     $dir =~ s/^(.*)\/[^\/]*$/$1/;
42 ahart 1.4 if ($file =~ m/^.*\/condor_[^_]*\.log$/)
43 ahart 1.3 {
44     my $jobNumber = $file;
45     $jobNumber =~ s/^.*\/condor_([^_]*)\.log$/$1/;
46     open (FILE, "<$file");
47     my @fileContents = <FILE>;
48     close (FILE);
49     my $fileContents = join ("", @fileContents);
50     $fileContents =~ s/\n/ /g;
51 ahart 1.19 $counting = 1;
52 ahart 1.8 if ($fileContents =~ m/return value/)
53 ahart 1.3 {
54 ahart 1.8 $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
55 ahart 1.9 $nGoodJobs++;
56     print "WARNING: Nonzero exit code for job $jobNumber. (return value $fileContents)\n" if $fileContents != 0;
57 ahart 1.8 $exitCodes{$dir}{$jobNumber} = $fileContents;
58     }
59 ahart 1.17 elsif ($fileContents =~ m/signal/)
60 ahart 1.8 {
61     $fileContents =~ s/.*\(signal ([^)]*)\).*/$1/g;
62 ahart 1.5 $nBadJobs++;
63 ahart 1.8 print "WARNING: Skipping job $jobNumber. (signal $fileContents)\n";
64     $signals{$dir}{$jobNumber} = $fileContents;
65 ahart 1.3 }
66 ahart 1.17 else
67     {
68     $nIncompleteJobs++;
69     $partial{$dir}{$jobNumber} = 1;
70     }
71 ahart 1.3 }
72 ahart 1.5 if ($file =~ m/^.*\/crossSectionInPicobarn\.txt$/)
73     {
74     open (CROSS_SECTION, "<$file");
75     my $crossSection = <CROSS_SECTION>;
76     close (CROSS_SECTION);
77     $crossSections{$dir} = $crossSection;
78     }
79 ahart 1.3 }
80 ahart 1.1 foreach my $file (@$files)
81     {
82     next if $file eq ".";
83     next if $file eq "..";
84 ahart 1.5 my $dir = $file;
85     $dir =~ s/^(.*)\/[^\/]*$/$1/;
86 ahart 1.3 my $badJob = 0;
87 ahart 1.20 my $jobNumber;
88 ahart 1.27 my $fileIsEDM = 0;
89     $fileIsEDM = isEDM ($file) if $file =~ m/^.*\.root$/;
90     if ($file =~ m/^.*_[^_]*\.root$/ && !$fileIsEDM)
91 ahart 1.1 {
92 ahart 1.20 $jobNumber = $file;
93 ahart 1.3 $jobNumber =~ s/^.*_([^_]*)\.root$/$1/;
94 ahart 1.17 $badJob = (defined $signals{$dir} && defined $signals{$dir}{$jobNumber}) || (defined $partial{$dir} && defined $partial{$dir}{$jobNumber});
95 ahart 1.5 }
96     next if $badJob;
97 ahart 1.27 if ($file =~ m/^.*\.root$/ && !$fileIsEDM)
98 ahart 1.5 {
99 ahart 1.11 foreach my $arg (@ARGV)
100     {
101 ahart 1.19 if (substr ($file, 0, length ($arg)) eq $arg)
102 ahart 1.11 {
103 ahart 1.19 $dirs{$arg} = $dir;
104 ahart 1.20 if (countEvents ($file, $cutFlow) < 0)
105     {
106     $nGoodJobs--;
107     $nBadJobs++;
108     print "WARNING: Skipping job $jobNumber. (bad ROOT file)\n";
109     last;
110     }
111 ahart 1.11 push (@{$rootFiles{$arg}}, $file);
112 ahart 1.24 if ($opt{"weight"})
113     {
114     push (@weights, $opt{"weight"}) if !($opt{"weight"} =~ m/,/);
115     push (@weights, 1.0) if $opt{"weight"} =~ m/,/;
116     }
117 ahart 1.13 push (@weights, $opt{"xsection"} * $integratedLuminosity) if !$opt{"weight"} && $opt{"xsection"};
118     push (@weights, $crossSections{$dir} * $integratedLuminosity) if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dir};
119     push (@weights, 1.0) if !$opt{"weight"} && !$opt{"xsection"} && !(defined $crossSections{$dir});
120     $weights{$arg} = $weights[-1];
121 ahart 1.11 }
122     }
123 ahart 1.5 }
124 ahart 1.28 if ($file =~ m/^.*\.root$/ && $fileIsEDM)
125     {
126     $fileIsEDM -= $fileIsEDM if $fileIsEDM < 0;
127     $skimEventCounts{$dir} += $fileIsEDM if defined $skimEventCounts{$dir};
128     $skimEventCounts{$dir} = $fileIsEDM if !(defined $skimEventCounts{$dir});
129     }
130 ahart 1.5 }
131 ahart 1.11 if (!%rootFiles)
132 ahart 1.6 {
133     print "Found no ROOT files to merge!\n";
134     exit;
135     }
136 ahart 1.11 my %nTotalEvents;
137     my $nTotalEvents = 0;
138 ahart 1.13 my @mergedFiles;
139     my @mergedWeights;
140 ahart 1.11 foreach my $arg (@ARGV)
141     {
142     my $rootFiles = join (" ", @{$rootFiles{$arg}});
143 ahart 1.28 my $tmpName = "." . $arg . "_" . "$opt{'prefix'}.root";
144 ahart 1.13 $tmpName =~ s/\//_/g;
145     system ("mergeTFileServiceHistograms -i $rootFiles -o $tmpName");
146     my $count = countEvents ($tmpName, $cutFlow);
147     system ("cutFlowLimits $tmpName");
148 ahart 1.11 $nTotalEvents{$arg} = $count;
149     $nTotalEvents += $count;
150 ahart 1.14 $weights{$arg} /= $count if !$opt{"weight"} && $opt{"xsection"};
151     $weights{$arg} /= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
152 ahart 1.19 push (@mergedFiles, $tmpName);
153 ahart 1.14 push (@mergedWeights, $weights{$arg});
154 ahart 1.25 foreach my $rootFile (@{$rootFiles{$arg}})
155     {
156     my $weight = $weights{$arg};
157     system ("weightTrees $rootFile $weight");
158     }
159 ahart 1.26 if (defined $crossSections{$dirs{$arg}})
160     {
161     open (EVENT_COUNT, ">$dirs{$arg}/numberOfEvents.txt");
162     print EVENT_COUNT sprintf ("%.0f", $count) . "\n";
163     close (EVENT_COUNT);
164     }
165 ahart 1.11 }
166 ahart 1.13 my $mergedFiles = join (" ", @mergedFiles);
167     my $mergedWeights = join (",", @mergedWeights);
168 ahart 1.24 $mergedWeights = $opt{"weight"} if $opt{"weight"} && $opt{"weight"} =~ m/,/;
169 ahart 1.13 system ("mergeTFileServiceHistograms -i $mergedFiles -o $opt{'prefix'}.root -w $mergedWeights");
170 ahart 1.28 foreach my $dir (keys %skimEventCounts)
171     {
172     open (EDM_EVENTS, ">$dir/skimNumberOfEvents.txt");
173     print EDM_EVENTS $skimEventCounts{$dir} . "\n";
174     close (EDM_EVENTS);
175     }
176 ahart 1.13 foreach my $mergedFile (@mergedFiles)
177 ahart 1.5 {
178 ahart 1.13 unlink ("$mergedFile");
179 ahart 1.1 }
180 ahart 1.19 print "=============================================\n";
181 lantonel 1.23 my $output = sprintf "Cross-section of samples:\n";
182 ahart 1.21 my $printOutput = 0;
183 ahart 1.10 foreach my $arg (keys %weights)
184 lantonel 1.23 {
185 ahart 1.12 my $shortArg = $arg;
186     $shortArg =~ s/^.*\/([^\/]*)$/$1/;
187 lantonel 1.23 $output = sprintf "%s $shortArg: %.5g pb\n", $output, ($crossSections{$dirs{$arg}});
188 ahart 1.21 $printOutput = 1 if $weights{$arg} != 1;
189 lantonel 1.23 }
190     print $output if $printOutput;
191 jbrinson 1.22
192 lantonel 1.23 my $output = sprintf "Effective luminosities of samples:\n";
193     my $printOutput = 0;
194 jbrinson 1.22 foreach my $arg (keys %weights)
195 lantonel 1.23 {
196 jbrinson 1.22 my $shortArg = $arg;
197     $shortArg =~ s/^.*\/([^\/]*)$/$1/;
198 lantonel 1.23 $output = sprintf "%s $shortArg: %.5g/fb\n", $output, ($integratedLuminosity / (1000.0 * $weights{$arg})) if $weights{$arg} != 1;
199 jbrinson 1.22 $printOutput = 1 if $weights{$arg} != 1;
200 lantonel 1.23 }
201     print $output if $printOutput;
202 jbrinson 1.22
203 ahart 1.19 $output = sprintf "Weights for target luminosity of %g/fb:\n", ($integratedLuminosity / 1000.0);
204     $printOutput = 0;
205     foreach my $arg (keys %weights)
206     {
207     my $shortArg = $arg;
208     $shortArg =~ s/^.*\/([^\/]*)$/$1/;
209 ahart 1.21 $output = sprintf "%s $shortArg: %.5g\n", $output, $weights{$arg} if $weights{$arg} != 1;
210 ahart 1.18 $printOutput = 1 if $weights{$arg} != 1;
211 ahart 1.10 }
212 ahart 1.18 print $output if $printOutput;
213 lantonel 1.23
214 ahart 1.6 my $goodEvents = countEvents ("$opt{'prefix'}.root", $cutFlow);
215 ahart 1.26 printf "$nGoodJobs jobs ran successfully over $nTotalEvents (%.1f weighted) events.\n", $goodEvents if $counting;
216 ahart 1.4 print "$nBadJobs jobs failed to run.\n" if $counting;
217 ahart 1.17 print "$nIncompleteJobs jobs have not finished.\n" if $counting;
218 ahart 1.19 print "=============================================\n";
219 ahart 1.1
220     sub
221     processArgs
222     {
223     my $argv = shift;
224    
225     my @files;
226     foreach my $arg (@$argv)
227     {
228     $arg =~ s/\/*$//;
229     if (!(-e $arg))
230     {
231     print "$arg does not exist!\n";
232     exit;
233     }
234     next if ($arg =~ m/\/\.$/ || $arg =~ m/\/\.\.$/);
235     if (-d $arg)
236     {
237     opendir (DIR, $arg);
238     my @dirContents = readdir (DIR);
239     closedir (DIR);
240     for (my $i = 0; $i < @dirContents; $i++)
241     {
242     $dirContents[$i] = "$arg/$dirContents[$i]";
243     }
244     my $newFiles = processArgs (\@dirContents);
245     push (@files, @$newFiles);
246     }
247     else
248     {
249     push (@files, $arg);
250     }
251     }
252    
253     return \@files;
254     }
255    
256     sub
257     printHelp
258     {
259     my $exeName = $0;
260     $exeName =~ s/^.*\/([^\/]*)$/$1/;
261    
262 ahart 1.6 print "Usage: $exeName [OPTION]... -p PREFIX DIRECTORIES_AND_FILES\n";
263 ahart 1.7 print "Merges ROOT files containing histograms. If there are Condor logs in the\n";
264     print "specified directories, checks for nonzero return values. If the directories\n";
265     print "where created by \"osusub\", uses the cross section from the database to weight\n";
266     print "all histograms.\n";
267 ahart 1.1 print "\n";
268     print "Mandatory arguments to long options are mandatory for short options too.\n";
269 ahart 1.6 printf "%-29s%s\n", " -c, --cutflow HISTOGRAM", "name of histogram to use for the cutflow (default:";
270     printf "%-29s%s\n", " ", "cutFlow)";
271 ahart 1.1 printf "%-29s%s\n", " -h, --help", "print this help message";
272 ahart 1.24 printf "%-29s%s\n", " -l, --luminosity", "integrated luminosity to which the histograms are";
273     printf "%-29s%s\n", " ", "weighted (default: 10000/pb)";
274 ahart 1.7 printf "%-29s%s\n", " -p, --prefix PREFIX", "output is named PREFIX.root";
275 ahart 1.5 printf "%-29s%s\n", " -w, --weight WEIGHT", "scale the output by WEIGHT, overriding the";
276     printf "%-29s%s\n", " ", "automatic weighting using the cross section from";
277 ahart 1.24 printf "%-29s%s\n", " ", "the database; WEIGHT may be a single number or a";
278     printf "%-29s%s\n", " ", "comma-separated list, one for each input file";
279 ahart 1.6 printf "%-29s%s\n", " -x, --xsection XSECTION", "use XSECTION to weight the histograms instead of";
280     printf "%-29s%s\n", " ", "the value in the database";
281 ahart 1.1
282     exit;
283     }
284 ahart 1.3
285     sub
286     getRunList
287     {
288     my $runListFile = shift;
289    
290     open (RUN_LIST, "<$runListFile");
291     my @runList0 = <RUN_LIST>;
292     close (RUN_LIST);
293     my @runList;
294     foreach my $file (@runList0)
295     {
296     next if !($file =~ m/^.*file:.*\.root.*/);
297     $file =~ s/.*file:(.*)\.root.*/$1.root/;
298     push (@runList, $file);
299     }
300    
301     return \@runList;
302     }
303    
304     sub
305     countEvents
306     {
307 ahart 1.4 my $file = shift;
308 ahart 1.6 my $cutFlow = shift;
309 ahart 1.4
310 ahart 1.6 my $output = `getEventsFromCutFlow $file $cutFlow`;
311 ahart 1.16 if ($output =~ m/Did not find a histogram named/ || $output =~ m/appears to be empty/ || $output =~ m/Failed to open/)
312 ahart 1.5 {
313     print $output;
314     return -1;
315     }
316 ahart 1.4 $output =~ s/^.*: (.*)$/$1/;
317     $output =~ s/\n//g;
318    
319     return $output;
320 ahart 1.3 }
321 ahart 1.27
322     sub
323     isEDM
324     {
325     my $file = shift;
326    
327     my $output = `edmFileUtil $file 2>&1`;
328     return 0 if $output =~ m/appears to be missing/ || $output =~ m/not a ROOT file/;
329 ahart 1.28 $output =~ s/[\f\n\r]//g;
330     $output =~ s/^.*, ([^,]*) events,.*$/$1/;
331     $output += 0;
332     $output-- if $output == 0;
333     return $output;
334 ahart 1.27 }