ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists
(Generate patch)

Comparing UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists (file contents):
Revision 1.1 by ahart, Thu Jun 14 17:42:30 2012 UTC vs.
Revision 1.26 by ahart, Tue Jun 11 11:02:00 2013 UTC

# Line 2 | Line 2
2  
3   use strict;
4   use Getopt::Long;
5 + use POSIX;
6 +
7 + sub processArgs;
8 + sub printHelp;
9 + sub getRunList;
10 + sub countEvents;
11  
12   my %opt;
13   Getopt::Long::Configure ("bundling");
14 < GetOptions (\%opt, "prefix|p=s", "help|h");
14 > GetOptions (\%opt, "cutflow|c=s", "luminosity|l=s", "prefix|p=s", "weight|w=s", "xsection|x=s", "help|h");
15  
16   printHelp () if $opt{"help"} || !$opt{"prefix"};
17   my $files = processArgs (\@ARGV);
18 < my @rootFiles;
19 < my $error = 0;
18 > my %rootFiles;
19 > my %weights;
20 > my @weights;
21 > my $nGoodJobs = 0;
22 > my $nBadJobs = 0;
23 > my $nIncompleteJobs = 0;
24 > my $counting = 0;
25 > my %exitCodes;
26 > my %signals;
27 > my %partial;
28 > my %crossSections;
29 > my %dirs;
30 > my $integratedLuminosity = 10000;
31 > $integratedLuminosity = $opt{"luminosity"} if $opt{"luminosity"};
32 > my $cutFlow = "cutFlow";
33 > $cutFlow = $opt{"cutflow"} if $opt{"cutflow"};
34 > foreach my $file (@$files)
35 >  {
36 >    next if $file eq ".";
37 >    next if $file eq "..";
38 >    my $dir = $file;
39 >    $dir =~ s/^(.*)\/[^\/]*$/$1/;
40 >    if ($file =~ m/^.*\/condor_[^_]*\.log$/)
41 >      {
42 >        my $jobNumber = $file;
43 >        $jobNumber =~ s/^.*\/condor_([^_]*)\.log$/$1/;
44 >        open (FILE, "<$file");
45 >        my @fileContents = <FILE>;
46 >        close (FILE);
47 >        my $fileContents = join ("", @fileContents);
48 >        $fileContents =~ s/\n/ /g;
49 >        $counting = 1;
50 >        if ($fileContents =~ m/return value/)
51 >          {
52 >            $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
53 >            $nGoodJobs++;
54 >            print "WARNING: Nonzero exit code for job $jobNumber. (return value $fileContents)\n" if $fileContents != 0;
55 >            $exitCodes{$dir}{$jobNumber} = $fileContents;
56 >          }
57 >        elsif ($fileContents =~ m/signal/)
58 >          {
59 >            $fileContents =~ s/.*\(signal ([^)]*)\).*/$1/g;
60 >            $nBadJobs++;
61 >            print "WARNING: Skipping job $jobNumber. (signal $fileContents)\n";
62 >            $signals{$dir}{$jobNumber} = $fileContents;
63 >          }
64 >        else
65 >          {
66 >            $nIncompleteJobs++;
67 >            $partial{$dir}{$jobNumber} = 1;
68 >          }
69 >      }
70 >    if ($file =~ m/^.*\/crossSectionInPicobarn\.txt$/)
71 >      {
72 >        open (CROSS_SECTION, "<$file");
73 >        my $crossSection = <CROSS_SECTION>;
74 >        close (CROSS_SECTION);
75 >        $crossSections{$dir} = $crossSection;
76 >      }
77 >  }
78   foreach my $file (@$files)
79    {
80      next if $file eq ".";
81      next if $file eq "..";
82 <    if ($file =~ m/\.root$/)
82 >    my $dir = $file;
83 >    $dir =~ s/^(.*)\/[^\/]*$/$1/;
84 >    my $badJob = 0;
85 >    my $jobNumber;
86 >    if ($file =~ m/^.*_[^_]*\.root$/)
87 >      {
88 >        $jobNumber = $file;
89 >        $jobNumber =~ s/^.*_([^_]*)\.root$/$1/;
90 >        $badJob = (defined $signals{$dir} && defined $signals{$dir}{$jobNumber}) || (defined $partial{$dir} && defined $partial{$dir}{$jobNumber});
91 >      }
92 >    next if $badJob;
93 >    if ($file =~ m/^.*\.root$/)
94        {
95 <        push (@rootFiles, $file);
96 <        next;
95 >        foreach my $arg (@ARGV)
96 >          {
97 >            if (substr ($file, 0, length ($arg)) eq $arg)
98 >              {
99 >                $dirs{$arg} = $dir;
100 >                if (countEvents ($file, $cutFlow) < 0)
101 >                  {
102 >                    $nGoodJobs--;
103 >                    $nBadJobs++;
104 >                    print "WARNING: Skipping job $jobNumber. (bad ROOT file)\n";
105 >                    last;
106 >                  }
107 >                push (@{$rootFiles{$arg}}, $file);
108 >                if ($opt{"weight"})
109 >                  {
110 >                    push (@weights, $opt{"weight"}) if !($opt{"weight"} =~ m/,/);
111 >                    push (@weights, 1.0) if $opt{"weight"} =~ m/,/;
112 >                  }
113 >                push (@weights, $opt{"xsection"} * $integratedLuminosity) if !$opt{"weight"} && $opt{"xsection"};
114 >                push (@weights, $crossSections{$dir} * $integratedLuminosity) if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dir};
115 >                push (@weights, 1.0) if !$opt{"weight"} && !$opt{"xsection"} && !(defined $crossSections{$dir});
116 >                $weights{$arg} = $weights[-1];
117 >              }
118 >          }
119        }
120 <    open (FILE, "<$file");
121 <    my @fileContents = <FILE>;
122 <    close (FILE);
123 <    my $fileContents = join ("", @fileContents);
124 <    next if !($fileContents =~ m/\(return value [^)]*\)/);
125 <    $fileContents =~ s/\n/ /g;
126 <    $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
127 <    if ($fileContents != 0)
128 <      {
129 <        printf "Error: return value of %2d in \"$file\"!\n", $fileContents;
130 <        $error = 1;
131 <      }
132 <  }
133 < exit if $error;
134 < my $rootFiles = join (" ", @rootFiles);
135 < system ("mergeTFileServiceHistograms -i $rootFiles -o $opt{'prefix'}.root");
136 < system ("cutFlowTable $opt{'prefix'}.root cutFlow >& $opt{'prefix'}.tex");
120 >  }
121 > if (!%rootFiles)
122 >  {
123 >    print "Found no ROOT files to merge!\n";
124 >    exit;
125 >  }
126 > my %nTotalEvents;
127 > my $nTotalEvents = 0;
128 > my @mergedFiles;
129 > my @mergedWeights;
130 > foreach my $arg (@ARGV)
131 >  {
132 >    my $rootFiles = join (" ", @{$rootFiles{$arg}});
133 >    my $tmpName = $arg . "_" . "$opt{'prefix'}.root";
134 >    $tmpName =~ s/\//_/g;
135 >    system ("mergeTFileServiceHistograms -i $rootFiles -o $tmpName");
136 >    my $count = countEvents ($tmpName, $cutFlow);
137 >    system ("cutFlowLimits $tmpName");
138 >    $nTotalEvents{$arg} = $count;
139 >    $nTotalEvents += $count;
140 >    $weights{$arg} /= $count if !$opt{"weight"} && $opt{"xsection"};
141 >    $weights{$arg} /= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
142 >    push (@mergedFiles, $tmpName);
143 >    push (@mergedWeights, $weights{$arg});
144 >    foreach my $rootFile (@{$rootFiles{$arg}})
145 >      {
146 >        my $weight = $weights{$arg};
147 >        system ("weightTrees $rootFile $weight");
148 >      }
149 >    if (defined $crossSections{$dirs{$arg}})
150 >      {
151 >        open (EVENT_COUNT, ">$dirs{$arg}/numberOfEvents.txt");
152 >        print EVENT_COUNT sprintf ("%.0f", $count) . "\n";
153 >        close (EVENT_COUNT);
154 >      }
155 >  }
156 > my $mergedFiles = join (" ", @mergedFiles);
157 > my $mergedWeights = join (",", @mergedWeights);
158 > $mergedWeights = $opt{"weight"} if $opt{"weight"} && $opt{"weight"} =~ m/,/;
159 > system ("mergeTFileServiceHistograms -i $mergedFiles -o $opt{'prefix'}.root -w $mergedWeights");
160 > foreach my $mergedFile (@mergedFiles)
161 >  {
162 >    unlink ("$mergedFile");
163 >  }
164 > print "=============================================\n";
165 > my $output = sprintf "Cross-section of samples:\n";
166 > my $printOutput = 0;
167 > foreach my $arg (keys %weights)
168 > {
169 >    my $shortArg = $arg;
170 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
171 >    $output = sprintf "%s  $shortArg: %.5g pb\n", $output, ($crossSections{$dirs{$arg}});
172 >    $printOutput = 1 if $weights{$arg} != 1;
173 > }
174 > print $output if $printOutput;
175 >
176 > my $output = sprintf "Effective luminosities of samples:\n";
177 > my $printOutput = 0;
178 > foreach my $arg (keys %weights)
179 >  {
180 >    my $shortArg = $arg;
181 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
182 >    $output = sprintf "%s  $shortArg: %.5g/fb\n", $output, ($integratedLuminosity / (1000.0 * $weights{$arg})) if $weights{$arg} != 1;
183 >    $printOutput = 1 if $weights{$arg} != 1;
184 >  }
185 > print $output if $printOutput;
186 >
187 > $output = sprintf "Weights for target luminosity of %g/fb:\n", ($integratedLuminosity / 1000.0);
188 > $printOutput = 0;
189 > foreach my $arg (keys %weights)
190 >  {
191 >    my $shortArg = $arg;
192 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
193 >    $output = sprintf "%s  $shortArg: %.5g\n", $output, $weights{$arg} if $weights{$arg} != 1;
194 >    $printOutput = 1 if $weights{$arg} != 1;
195 >  }
196 > print $output if $printOutput;
197 >
198 > my $goodEvents = countEvents ("$opt{'prefix'}.root", $cutFlow);
199 > printf "$nGoodJobs jobs ran successfully over $nTotalEvents (%.1f weighted) events.\n", $goodEvents if $counting;
200 > print "$nBadJobs jobs failed to run.\n" if $counting;
201 > print "$nIncompleteJobs jobs have not finished.\n" if $counting;
202 > print "=============================================\n";
203  
204   sub
205   processArgs
# Line 80 | Line 243 | printHelp
243    my $exeName = $0;
244    $exeName =~ s/^.*\/([^\/]*)$/$1/;
245  
246 <  print "Usage: $exeName [OPTIONS] -p PREFIX DIRECTORIES_AND_FILES\n";
247 <  print "Merges ROOT files containing histograms and produces a cutflow table in the\n";
248 <  print "form of a LaTeX document from the histogram named \"cutFlow\". If there are\n";
249 <  print "Condor logs in the specified directories, checks for nonzero return values.\n";
246 >  print "Usage: $exeName [OPTION]... -p PREFIX DIRECTORIES_AND_FILES\n";
247 >  print "Merges ROOT files containing histograms. If there are Condor logs in the\n";
248 >  print "specified directories, checks for nonzero return values. If the directories\n";
249 >  print "where created by \"osusub\", uses the cross section from the database to weight\n";
250 >  print "all histograms.\n";
251    print "\n";
252    print "Mandatory arguments to long options are mandatory for short options too.\n";
253 +  printf "%-29s%s\n", "  -c, --cutflow HISTOGRAM", "name of histogram to use for the cutflow (default:";
254 +  printf "%-29s%s\n", "                  ", "cutFlow)";
255    printf "%-29s%s\n", "  -h, --help", "print this help message";
256 <  printf "%-29s%s\n", "  -p, --prefix PREFIX", "output files are named PREFIX.root and PREFIX.tex";
256 >  printf "%-29s%s\n", "  -l, --luminosity", "integrated luminosity to which the histograms are";
257 >  printf "%-29s%s\n", "                  ", "weighted (default: 10000/pb)";
258 >  printf "%-29s%s\n", "  -p, --prefix PREFIX", "output is  named PREFIX.root";
259 >  printf "%-29s%s\n", "  -w, --weight WEIGHT", "scale the output by WEIGHT, overriding the";
260 >  printf "%-29s%s\n", "                     ", "automatic weighting using the cross section from";
261 >  printf "%-29s%s\n", "                     ", "the database; WEIGHT may be a single number or a";
262 >  printf "%-29s%s\n", "                     ", "comma-separated list, one for each input file";
263 >  printf "%-29s%s\n", "  -x, --xsection XSECTION", "use XSECTION to weight the histograms instead of";
264 >  printf "%-29s%s\n", "                     ", "the value in the database";
265  
266    exit;
267   }
268 +
269 + sub
270 + getRunList
271 + {
272 +  my $runListFile = shift;
273 +
274 +  open (RUN_LIST, "<$runListFile");
275 +  my @runList0 = <RUN_LIST>;
276 +  close (RUN_LIST);
277 +  my @runList;
278 +  foreach my $file (@runList0)
279 +    {
280 +      next if !($file =~ m/^.*file:.*\.root.*/);
281 +      $file =~ s/.*file:(.*)\.root.*/$1.root/;
282 +      push (@runList, $file);
283 +    }
284 +
285 +  return \@runList;
286 + }
287 +
288 + sub
289 + countEvents
290 + {
291 +  my $file = shift;
292 +  my $cutFlow = shift;
293 +
294 +  my $output = `getEventsFromCutFlow $file $cutFlow`;
295 +  if ($output =~ m/Did not find a histogram named/ || $output =~ m/appears to be empty/ || $output =~ m/Failed to open/)
296 +    {
297 +      print $output;
298 +      return -1;
299 +    }
300 +  $output =~ s/^.*: (.*)$/$1/;
301 +  $output =~ s/\n//g;
302 +
303 +  return $output;
304 + }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines