ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists
(Generate patch)

Comparing UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists (file contents):
Revision 1.5 by ahart, Sun Sep 9 20:05:12 2012 UTC vs.
Revision 1.30 by ahart, Tue Jul 9 19:25:00 2013 UTC

# Line 8 | Line 8 | sub processArgs;
8   sub printHelp;
9   sub getRunList;
10   sub countEvents;
11 + sub isEDM;
12  
13   my %opt;
14   Getopt::Long::Configure ("bundling");
15 < GetOptions (\%opt, "luminosity|l=s", "prefix|p=s", "weight|w=s", "help|h");
15 > GetOptions (\%opt, "cutflow|c=s", "luminosity|l=s", "prefix|p=s", "weight|w=s", "xsection|x=s", "help|h");
16  
17   printHelp () if $opt{"help"} || !$opt{"prefix"};
18   my $files = processArgs (\@ARGV);
19 < my @rootFiles;
19 > my %rootFiles;
20 > my %nTotalEvents;
21 > my $nTotalEvents = 0;
22 > my %skimEventCounts;
23 > my %weights;
24   my @weights;
25   my $nGoodJobs = 0;
26   my $nBadJobs = 0;
27 + my $nIncompleteJobs = 0;
28   my $counting = 0;
29   my %exitCodes;
30 + my %signals;
31 + my %partial;
32   my %crossSections;
33 + my %dirs;
34 + my %channels;
35   my $integratedLuminosity = 10000;
36   $integratedLuminosity = $opt{"luminosity"} if $opt{"luminosity"};
37 + my $cutFlow = "cutFlow";
38 + $cutFlow = $opt{"cutflow"} if $opt{"cutflow"};
39   foreach my $file (@$files)
40    {
41      next if $file eq ".";
# Line 39 | Line 51 | foreach my $file (@$files)
51          close (FILE);
52          my $fileContents = join ("", @fileContents);
53          $fileContents =~ s/\n/ /g;
54 <        $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
55 <        $nGoodJobs++;
56 <        if ($fileContents != 0)
54 >        $counting = 1;
55 >        if ($fileContents =~ m/return value/)
56 >          {
57 >            $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
58 >            $nGoodJobs++;
59 >            print "WARNING: Nonzero exit code for job $jobNumber. (return value $fileContents)\n" if $fileContents != 0;
60 >            $exitCodes{$dir}{$jobNumber} = $fileContents;
61 >          }
62 >        elsif ($fileContents =~ m/signal/)
63            {
64 +            $fileContents =~ s/.*\(signal ([^)]*)\).*/$1/g;
65              $nBadJobs++;
66 <            print "Skipping job $jobNumber. (return value $fileContents)\n";
66 >            print "WARNING: Skipping job $jobNumber. (signal $fileContents)\n";
67 >            $signals{$dir}{$jobNumber} = $fileContents;
68 >          }
69 >        else
70 >          {
71 >            $nIncompleteJobs++;
72 >            $partial{$dir}{$jobNumber} = 1;
73            }
49        $exitCodes{$dir}{$jobNumber} = $fileContents;
50        $counting = 1;
74        }
75      if ($file =~ m/^.*\/crossSectionInPicobarn\.txt$/)
76        {
# Line 57 | Line 80 | foreach my $file (@$files)
80          $crossSections{$dir} = $crossSection;
81        }
82    }
60 my $nTotalEvents = 0;
83   foreach my $file (@$files)
84    {
85      next if $file eq ".";
# Line 65 | Line 87 | foreach my $file (@$files)
87      my $dir = $file;
88      $dir =~ s/^(.*)\/[^\/]*$/$1/;
89      my $badJob = 0;
90 <    if ($file =~ m/^.*_[^_]*\.root$/)
90 >    my $jobNumber;
91 >    my $fileIsEDM = 0;
92 >    $fileIsEDM = isEDM ($file) if $file =~ m/^.*\.root$/;
93 >    if ($fileIsEDM < 0)
94        {
95 <        my $jobNumber = $file;
95 >        unlink ($file);
96 >        next;
97 >      }
98 >    if ($file =~ m/^.*_[^_]*\.root$/ && !$fileIsEDM)
99 >      {
100 >        $jobNumber = $file;
101          $jobNumber =~ s/^.*_([^_]*)\.root$/$1/;
102 <        $badJob = defined $exitCodes{$dir} && defined $exitCodes{$dir}{$jobNumber} && $exitCodes{$dir}{$jobNumber};
102 >        $badJob = (defined $signals{$dir} && defined $signals{$dir}{$jobNumber}) || (defined $partial{$dir} && defined $partial{$dir}{$jobNumber});
103        }
104      next if $badJob;
105 <    if ($file =~ m/^.*\.root$/)
105 >    if ($file =~ m/^.*\.root$/ && !$fileIsEDM)
106        {
107 <        my $nEvents = countEvents ($file);
108 <        $nTotalEvents = -1 if $nTotalEvents < 0 || $nEvents < 0;
109 <        $nTotalEvents += $nEvents if !($nTotalEvents < 0 || $nEvents < 0);
107 >        foreach my $arg (@ARGV)
108 >          {
109 >            if (substr ($file, 0, length ($arg)) eq $arg)
110 >              {
111 >                $dirs{$arg} = $dir;
112 >                my $count = countEvents ($file, $cutFlow);
113 >                if ($count->{$cutFlow} < 0)
114 >                  {
115 >                    $nGoodJobs--;
116 >                    $nBadJobs++;
117 >                    print "WARNING: Skipping job $jobNumber. (bad ROOT file)\n";
118 >                    last;
119 >                  }
120 >                $nTotalEvents{$arg} += $count->{$cutFlow} if defined $nTotalEvents{$arg};
121 >                $nTotalEvents{$arg} = $count->{$cutFlow} if !(defined $nTotalEvents{$arg});
122 >                $nTotalEvents += $count->{$cutFlow};
123 >                push (@{$rootFiles{$arg}}, $file);
124 >                if ($opt{"weight"})
125 >                  {
126 >                    push (@weights, $opt{"weight"}) if !($opt{"weight"} =~ m/,/);
127 >                    push (@weights, 1.0) if $opt{"weight"} =~ m/,/;
128 >                  }
129 >                push (@weights, $opt{"xsection"} * $integratedLuminosity) if !$opt{"weight"} && $opt{"xsection"};
130 >                push (@weights, $crossSections{$dir} * $integratedLuminosity) if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dir};
131 >                push (@weights, 1.0) if !$opt{"weight"} && !$opt{"xsection"} && !(defined $crossSections{$dir});
132 >                $weights{$arg} = $weights[-1];
133 >
134 >                foreach my $channelCutFlow (keys %{$count})
135 >                  {
136 >                    my $channel = substr ($channelCutFlow, 0, length ($channelCutFlow) - length ($cutFlow));
137 >                    next if !length ($channel);
138 >                    $skimEventCounts{$arg} = {} if !(defined $skimEventCounts{$arg});
139 >                    $skimEventCounts{$arg}{$channel} += $count->{$channelCutFlow} if defined $skimEventCounts{$arg};
140 >                    $skimEventCounts{$arg}{$channel} = $count->{$channelCutFlow} if !(defined $skimEventCounts{$arg});
141 >                  }
142 >              }
143 >          }
144        }
145    }
146 < foreach my $file (@$files)
146 > if (!%rootFiles)
147    {
148 <    next if $file eq ".";
149 <    next if $file eq "..";
150 <    my $dir = $file;
151 <    $dir =~ s/^(.*)\/[^\/]*$/$1/;
152 <    my $badJob = 0;
153 <    if ($file =~ m/^.*_[^_]*\.root$/)
148 >    print "Found no ROOT files to merge!\n";
149 >    exit;
150 >  }
151 > my @mergedFiles;
152 > my @mergedWeights;
153 > foreach my $arg (@ARGV)
154 >  {
155 >    my $rootFiles = join (" ", @{$rootFiles{$arg}});
156 >    my $tmpName = "." . $arg . "_" . "$opt{'prefix'}.root";
157 >    $tmpName =~ s/\//_/g;
158 >    system ("mergeTFileServiceHistograms -i $rootFiles -o $tmpName");
159 >    my $count = $nTotalEvents{$arg};
160 >    system ("cutFlowLimits $tmpName");
161 >    if ($count)
162        {
163 <        my $jobNumber = $file;
164 <        $jobNumber =~ s/^.*_([^_]*)\.root$/$1/;
93 <        $badJob = defined $exitCodes{$dir} && defined $exitCodes{$dir}{$jobNumber} && $exitCodes{$dir}{$jobNumber};
163 >        $weights{$arg} /= $count if !$opt{"weight"} && $opt{"xsection"};
164 >        $weights{$arg} /= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
165        }
166 <    next if $badJob;
96 <    if ($file =~ m/^.*\.root$/)
166 >    else
167        {
168 <        push (@rootFiles, $file);
169 <        push (@weights, $opt{"weight"}) if $opt{"weight"};
170 <        push (@weights, ($crossSections{$dir} * $integratedLuminosity) / $nTotalEvents) if !$opt{"weight"} && defined $crossSections{$dir};
171 <        push (@weights, 1.0) if !$opt{"weight"} && !(defined $crossSections{$dir});
168 >        $weights{$arg} *= $count if !$opt{"weight"} && $opt{"xsection"};
169 >        $weights{$arg} *= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
170 >      }
171 >    push (@mergedFiles, $tmpName);
172 >    push (@mergedWeights, $weights{$arg});
173 >    foreach my $rootFile (@{$rootFiles{$arg}})
174 >      {
175 >        my $weight = $weights{$arg};
176 >        system ("weightTrees $rootFile $weight");
177 >      }
178 >    open (EVENT_COUNT, ">$dirs{$arg}/numberOfEvents.txt");
179 >    print EVENT_COUNT sprintf ("%.16g", $count) . "\n";
180 >    close (EVENT_COUNT);
181 >    foreach my $channel (keys %{$skimEventCounts{$arg}})
182 >      {
183 >        open (SKIM_COUNT, ">$dirs{$arg}/$channel/skimNumberOfEvents.txt");
184 >        print SKIM_COUNT sprintf ("%.16g", $skimEventCounts{$arg}{$channel}) . "\n";
185 >        close (SKIM_COUNT);
186        }
187    }
188 < if (!@rootFiles)
188 > my $mergedFiles = join (" ", @mergedFiles);
189 > my $mergedWeights = join (",", @mergedWeights);
190 > $mergedWeights = $opt{"weight"} if $opt{"weight"} && $opt{"weight"} =~ m/,/;
191 > system ("mergeTFileServiceHistograms -i $mergedFiles -o $opt{'prefix'}.root -w $mergedWeights");
192 > foreach my $mergedFile (@mergedFiles)
193    {
194 <    print "Found no ROOT files to merge!\n";
107 <    exit;
194 >    unlink ("$mergedFile");
195    }
196 < my $rootFiles = join (" ", @rootFiles);
197 < my $weights = join (",", @weights);
198 < system ("mergeTFileServiceHistograms -i $rootFiles -o $opt{'prefix'}.root -w $weights");
199 < system ("cutFlowTable $opt{'prefix'}.root cutFlow >& $opt{'prefix'}.tex");
200 < my $goodEvents = countEvents ("$opt{'prefix'}.root");
201 < print "$nGoodJobs jobs ran successfully over $nTotalEvents ($goodEvents weighted) events.\n" if $counting;
196 > print "=============================================\n";
197 > my $output = sprintf "Cross-section of samples:\n";
198 > my $printOutput = 0;
199 > foreach my $arg (keys %weights)
200 > {
201 >    my $shortArg = $arg;
202 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
203 >    $output = sprintf "%s  $shortArg: %.5g pb\n", $output, ($crossSections{$dirs{$arg}});
204 >    $printOutput = 1 if $weights{$arg} != 1;
205 > }
206 > print $output if $printOutput;
207 >
208 > my $output = sprintf "Effective luminosities of samples:\n";
209 > my $printOutput = 0;
210 > foreach my $arg (keys %weights)
211 >  {
212 >    my $shortArg = $arg;
213 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
214 >    $output = sprintf "%s  $shortArg: %.5g/fb\n", $output, ($integratedLuminosity / (1000.0 * $weights{$arg})) if $weights{$arg} != 1 && $weights{$arg} != 0;
215 >    $printOutput = 1 if $weights{$arg} != 1;
216 >  }
217 > print $output if $printOutput;
218 >
219 > $output = sprintf "Weights for target luminosity of %g/fb:\n", ($integratedLuminosity / 1000.0);
220 > $printOutput = 0;
221 > foreach my $arg (keys %weights)
222 >  {
223 >    my $shortArg = $arg;
224 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
225 >    $output = sprintf "%s  $shortArg: %.5g\n", $output, $weights{$arg} if $weights{$arg} != 1 && $weights{$arg} != 0;
226 >    $printOutput = 1 if $weights{$arg} != 1;
227 >  }
228 > print $output if $printOutput;
229 >
230 > my $goodEvents = countEvents ("$opt{'prefix'}.root", $cutFlow);
231 > printf "$nGoodJobs jobs ran successfully over %.1f (%.1f weighted) events.\n", $nTotalEvents, $goodEvents->{$cutFlow} if $counting;
232   print "$nBadJobs jobs failed to run.\n" if $counting;
233 + print "$nIncompleteJobs jobs have not finished.\n" if $counting;
234 + print "=============================================\n";
235  
236   sub
237   processArgs
# Line 156 | Line 275 | printHelp
275    my $exeName = $0;
276    $exeName =~ s/^.*\/([^\/]*)$/$1/;
277  
278 <  print "Usage: $exeName -p PREFIX [OPTION]... DIRECTORIES_AND_FILES\n";
279 <  print "Merges ROOT files containing histograms and produces a cutflow table in the\n";
280 <  print "form of a LaTeX document from the histogram named \"cutFlow\". If there are\n";
281 <  print "Condor logs in the specified directories, checks for nonzero return values. If\n";
282 <  print "the directories where created by \"osusub\", uses the cross section from the\n";
164 <  print "database to weight all histograms.\n";
278 >  print "Usage: $exeName [OPTION]... -p PREFIX DIRECTORIES_AND_FILES\n";
279 >  print "Merges ROOT files containing histograms. If there are Condor logs in the\n";
280 >  print "specified directories, checks for nonzero return values. If the directories\n";
281 >  print "where created by \"osusub\", uses the cross section from the database to weight\n";
282 >  print "all histograms.\n";
283    print "\n";
284    print "Mandatory arguments to long options are mandatory for short options too.\n";
285 +  printf "%-29s%s\n", "  -c, --cutflow HISTOGRAM", "name of histogram to use for the cutflow (default:";
286 +  printf "%-29s%s\n", "                  ", "cutFlow)";
287    printf "%-29s%s\n", "  -h, --help", "print this help message";
288    printf "%-29s%s\n", "  -l, --luminosity", "integrated luminosity to which the histograms are";
289    printf "%-29s%s\n", "                  ", "weighted (default: 10000/pb)";
290 <  printf "%-29s%s\n", "  -p, --prefix PREFIX", "output files are named PREFIX.root and PREFIX.tex";
290 >  printf "%-29s%s\n", "  -p, --prefix PREFIX", "output is  named PREFIX.root";
291    printf "%-29s%s\n", "  -w, --weight WEIGHT", "scale the output by WEIGHT, overriding the";
292    printf "%-29s%s\n", "                     ", "automatic weighting using the cross section from";
293 <  printf "%-29s%s\n", "                     ", "the database";
293 >  printf "%-29s%s\n", "                     ", "the database; WEIGHT may be a single number or a";
294 >  printf "%-29s%s\n", "                     ", "comma-separated list, one for each input file";
295 >  printf "%-29s%s\n", "  -x, --xsection XSECTION", "use XSECTION to weight the histograms instead of";
296 >  printf "%-29s%s\n", "                     ", "the value in the database";
297  
298    exit;
299   }
# Line 198 | Line 321 | sub
321   countEvents
322   {
323    my $file = shift;
324 +  my $cutFlow = shift;
325  
326 <  my $output = `getEventsFromCutFlow $file cutFlow`;
327 <  if ($output =~ m/Did not find a histogram named/)
326 >  my @output = `getEventsFromCutFlow $file $cutFlow`;
327 >  if ($output[0] =~ m/Did not find a histogram named/ || $output[0] =~ m/appears to be empty/ || $output[0] =~ m/Failed to open/)
328 >    {
329 >      print $output[0];
330 >      return {$cutFlow => -1};
331 >    }
332 >  my %weight;
333 >  my %count;
334 >  foreach my $line (@output)
335 >    {
336 >      my $cutFlowName = $line;
337 >      my $weight = $line;
338 >      my $count = $line;
339 >      $cutFlowName =~ s/(.*): .* \/ .*\n*/$1/;
340 >      $weight =~ s/.*: .* \/ (.*)\n*/$1/;
341 >      $count =~ s/.*: (.*) \/ .*\n*/$1/;
342 >      $count{$cutFlowName} = $count + 0;
343 >      $weight{$cutFlowName} = $weight + 0;
344 >    }
345 >  foreach my $cutFlowName (keys %count)
346      {
347 <      print $output;
348 <      return -1;
347 >      $count{$cutFlowName} *= $count{$cutFlow} / $weight{$cutFlowName} if $weight{$cutFlowName};
348 >      $count{$cutFlowName} *= $count{$cutFlow} * $weight{$cutFlowName} if !$weight{$cutFlowName};
349      }
208  $output =~ s/^.*: (.*)$/$1/;
209  $output =~ s/\n//g;
350  
351 +  return \%count;
352 + }
353 +
354 + sub
355 + isEDM
356 + {
357 +  my $file = shift;
358 +
359 +  my $output = `edmFileUtil $file 2>&1`;
360 +  return 0 if $output =~ m/appears to be missing/ || $output =~ m/not a ROOT file/;
361 +  $output =~ s/[\f\n\r]//g;
362 +  $output =~ s/^.*, ([^,]*) events,.*$/$1/;
363 +  $output += 0;
364 +  $output-- if $output == 0;
365    return $output;
366   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines