ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists
(Generate patch)

Comparing UserCode/OSUT3Analysis/AnaTools/scripts/mergeHists (file contents):
Revision 1.3 by ahart, Tue Aug 28 12:27:57 2012 UTC vs.
Revision 1.27 by ahart, Tue Jun 18 02:00:39 2013 UTC

# Line 8 | Line 8 | sub processArgs;
8   sub printHelp;
9   sub getRunList;
10   sub countEvents;
11 + sub isEDM;
12  
13   my %opt;
14   Getopt::Long::Configure ("bundling");
15 < GetOptions (\%opt, "prefix|p=s", "weight|w=s", "help|h");
15 > GetOptions (\%opt, "cutflow|c=s", "luminosity|l=s", "prefix|p=s", "weight|w=s", "xsection|x=s", "help|h");
16  
17   printHelp () if $opt{"help"} || !$opt{"prefix"};
18   my $files = processArgs (\@ARGV);
19 < my @rootFiles;
19 > my %rootFiles;
20 > my %weights;
21   my @weights;
22 < my @goodJobs;
23 < my @badJobs;
24 < my %runList;
22 > my $nGoodJobs = 0;
23 > my $nBadJobs = 0;
24 > my $nIncompleteJobs = 0;
25   my $counting = 0;
26 + my %exitCodes;
27 + my %signals;
28 + my %partial;
29 + my %crossSections;
30 + my %dirs;
31 + my $integratedLuminosity = 10000;
32 + $integratedLuminosity = $opt{"luminosity"} if $opt{"luminosity"};
33 + my $cutFlow = "cutFlow";
34 + $cutFlow = $opt{"cutflow"} if $opt{"cutflow"};
35   foreach my $file (@$files)
36    {
37      next if $file eq ".";
38      next if $file eq "..";
39 <    if ($file =~ m/^.*\/condor_.*\.log$/)
39 >    my $dir = $file;
40 >    $dir =~ s/^(.*)\/[^\/]*$/$1/;
41 >    if ($file =~ m/^.*\/condor_[^_]*\.log$/)
42        {
43          my $jobNumber = $file;
44          $jobNumber =~ s/^.*\/condor_([^_]*)\.log$/$1/;
# Line 34 | Line 47 | foreach my $file (@$files)
47          close (FILE);
48          my $fileContents = join ("", @fileContents);
49          $fileContents =~ s/\n/ /g;
50 <        $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
51 <        push (@goodJobs, $jobNumber) if $fileContents == 0;
52 <        if ($fileContents != 0)
50 >        $counting = 1;
51 >        if ($fileContents =~ m/return value/)
52 >          {
53 >            $fileContents =~ s/.*\(return value ([^)]*)\).*/$1/g;
54 >            $nGoodJobs++;
55 >            print "WARNING: Nonzero exit code for job $jobNumber. (return value $fileContents)\n" if $fileContents != 0;
56 >            $exitCodes{$dir}{$jobNumber} = $fileContents;
57 >          }
58 >        elsif ($fileContents =~ m/signal/)
59 >          {
60 >            $fileContents =~ s/.*\(signal ([^)]*)\).*/$1/g;
61 >            $nBadJobs++;
62 >            print "WARNING: Skipping job $jobNumber. (signal $fileContents)\n";
63 >            $signals{$dir}{$jobNumber} = $fileContents;
64 >          }
65 >        else
66            {
67 <            push (@badJobs, $jobNumber);
68 <            print "Skipping job $jobNumber. (return value $fileContents)\n";
67 >            $nIncompleteJobs++;
68 >            $partial{$dir}{$jobNumber} = 1;
69            }
70        }
71 <    if ($file =~ m/^.*\/runList\.py$/)
71 >    if ($file =~ m/^.*\/crossSectionInPicobarn\.txt$/)
72        {
73 <        my $directory = $file;
74 <        $directory =~ s/^(.*)\/runList\.py$/$1/;
75 <        $runList{$directory} = getRunList ($file);
76 <        $counting = 1;
73 >        open (CROSS_SECTION, "<$file");
74 >        my $crossSection = <CROSS_SECTION>;
75 >        close (CROSS_SECTION);
76 >        $crossSections{$dir} = $crossSection;
77        }
78    }
53 my $nJobs = @goodJobs;
54 $nJobs += @badJobs;
55 my $goodEvents = 0;
56 my $goodLumis = 0;
57 my $goodRuns = 0;
58 my $badEvents = 0;
59 my $badLumis = 0;
60 my $badRuns = 0;
79   foreach my $file (@$files)
80    {
81      next if $file eq ".";
82      next if $file eq "..";
83 +    my $dir = $file;
84 +    $dir =~ s/^(.*)\/[^\/]*$/$1/;
85      my $badJob = 0;
86 <    if ($file =~ m/^.*_[^_]*\.root$/)
86 >    my $jobNumber;
87 >    my $fileIsEDM = 0;
88 >    $fileIsEDM = isEDM ($file) if $file =~ m/^.*\.root$/;
89 >    if ($file =~ m/^.*_[^_]*\.root$/ && !$fileIsEDM)
90        {
91 <        my $jobNumber = $file;
69 <        my $directory = $file;
91 >        $jobNumber = $file;
92          $jobNumber =~ s/^.*_([^_]*)\.root$/$1/;
93 <        $directory =~ s/^(.*)\/[^\/]*\.root$/$1/;
72 <        foreach my $badJobNumber (@badJobs)
73 <          {
74 <            $badJob = 1 if $jobNumber = $badJobNumber;
75 <          }
76 <        countEvents ($runList{$directory}, $jobNumber, $nJobs, \$goodEvents, \$goodLumis, \$goodRuns) if !$badJob && defined $runList{$directory};
77 <        countEvents ($runList{$directory}, $jobNumber, $nJobs, \$badEvents, \$badLumis, \$badRuns) if $badJob && defined $runList{$directory};
93 >        $badJob = (defined $signals{$dir} && defined $signals{$dir}{$jobNumber}) || (defined $partial{$dir} && defined $partial{$dir}{$jobNumber});
94        }
95      next if $badJob;
96 <    if ($file =~ m/^.*\.root$/)
96 >    if ($file =~ m/^.*\.root$/ && !$fileIsEDM)
97        {
98 <        push (@rootFiles, $file);
99 <        push (@weights, $opt{"weight"}) if $opt{"weight"};
100 <        push (@weights, 1.0) if !$opt{"weight"};
98 >        foreach my $arg (@ARGV)
99 >          {
100 >            if (substr ($file, 0, length ($arg)) eq $arg)
101 >              {
102 >                $dirs{$arg} = $dir;
103 >                if (countEvents ($file, $cutFlow) < 0)
104 >                  {
105 >                    $nGoodJobs--;
106 >                    $nBadJobs++;
107 >                    print "WARNING: Skipping job $jobNumber. (bad ROOT file)\n";
108 >                    last;
109 >                  }
110 >                push (@{$rootFiles{$arg}}, $file);
111 >                if ($opt{"weight"})
112 >                  {
113 >                    push (@weights, $opt{"weight"}) if !($opt{"weight"} =~ m/,/);
114 >                    push (@weights, 1.0) if $opt{"weight"} =~ m/,/;
115 >                  }
116 >                push (@weights, $opt{"xsection"} * $integratedLuminosity) if !$opt{"weight"} && $opt{"xsection"};
117 >                push (@weights, $crossSections{$dir} * $integratedLuminosity) if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dir};
118 >                push (@weights, 1.0) if !$opt{"weight"} && !$opt{"xsection"} && !(defined $crossSections{$dir});
119 >                $weights{$arg} = $weights[-1];
120 >              }
121 >          }
122        }
123    }
124 < my $nGoodJobs = @goodJobs;
88 < my $nBadJobs = @badJobs;
89 < print "$nGoodJobs ran successfully over $goodEvents events, $goodLumis lumis, $goodRuns runs\n" if $counting;
90 < print "$nBadJobs failed to run over $badEvents events, $badLumis lumis, $badRuns runs\n" if $counting;
91 < if (!@rootFiles)
124 > if (!%rootFiles)
125    {
126      print "Found no ROOT files to merge!\n";
127      exit;
128    }
129 < my $rootFiles = join (" ", @rootFiles);
130 < my $weights = join (",", @weights);
131 < system ("mergeTFileServiceHistograms -i $rootFiles -o $opt{'prefix'}.root -w $weights");
132 < system ("cutFlowTable $opt{'prefix'}.root cutFlow >& $opt{'prefix'}.tex");
129 > my %nTotalEvents;
130 > my $nTotalEvents = 0;
131 > my @mergedFiles;
132 > my @mergedWeights;
133 > foreach my $arg (@ARGV)
134 >  {
135 >    my $rootFiles = join (" ", @{$rootFiles{$arg}});
136 >    my $tmpName = $arg . "_" . "$opt{'prefix'}.root";
137 >    $tmpName =~ s/\//_/g;
138 >    system ("mergeTFileServiceHistograms -i $rootFiles -o $tmpName");
139 >    my $count = countEvents ($tmpName, $cutFlow);
140 >    system ("cutFlowLimits $tmpName");
141 >    $nTotalEvents{$arg} = $count;
142 >    $nTotalEvents += $count;
143 >    $weights{$arg} /= $count if !$opt{"weight"} && $opt{"xsection"};
144 >    $weights{$arg} /= $count if !$opt{"weight"} && !$opt{"xsection"} && defined $crossSections{$dirs{$arg}};
145 >    push (@mergedFiles, $tmpName);
146 >    push (@mergedWeights, $weights{$arg});
147 >    foreach my $rootFile (@{$rootFiles{$arg}})
148 >      {
149 >        my $weight = $weights{$arg};
150 >        system ("weightTrees $rootFile $weight");
151 >      }
152 >    if (defined $crossSections{$dirs{$arg}})
153 >      {
154 >        open (EVENT_COUNT, ">$dirs{$arg}/numberOfEvents.txt");
155 >        print EVENT_COUNT sprintf ("%.0f", $count) . "\n";
156 >        close (EVENT_COUNT);
157 >      }
158 >  }
159 > my $mergedFiles = join (" ", @mergedFiles);
160 > my $mergedWeights = join (",", @mergedWeights);
161 > $mergedWeights = $opt{"weight"} if $opt{"weight"} && $opt{"weight"} =~ m/,/;
162 > system ("mergeTFileServiceHistograms -i $mergedFiles -o $opt{'prefix'}.root -w $mergedWeights");
163 > foreach my $mergedFile (@mergedFiles)
164 >  {
165 >    unlink ("$mergedFile");
166 >  }
167 > print "=============================================\n";
168 > my $output = sprintf "Cross-section of samples:\n";
169 > my $printOutput = 0;
170 > foreach my $arg (keys %weights)
171 > {
172 >    my $shortArg = $arg;
173 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
174 >    $output = sprintf "%s  $shortArg: %.5g pb\n", $output, ($crossSections{$dirs{$arg}});
175 >    $printOutput = 1 if $weights{$arg} != 1;
176 > }
177 > print $output if $printOutput;
178 >
179 > my $output = sprintf "Effective luminosities of samples:\n";
180 > my $printOutput = 0;
181 > foreach my $arg (keys %weights)
182 >  {
183 >    my $shortArg = $arg;
184 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
185 >    $output = sprintf "%s  $shortArg: %.5g/fb\n", $output, ($integratedLuminosity / (1000.0 * $weights{$arg})) if $weights{$arg} != 1;
186 >    $printOutput = 1 if $weights{$arg} != 1;
187 >  }
188 > print $output if $printOutput;
189 >
190 > $output = sprintf "Weights for target luminosity of %g/fb:\n", ($integratedLuminosity / 1000.0);
191 > $printOutput = 0;
192 > foreach my $arg (keys %weights)
193 >  {
194 >    my $shortArg = $arg;
195 >    $shortArg =~ s/^.*\/([^\/]*)$/$1/;
196 >    $output = sprintf "%s  $shortArg: %.5g\n", $output, $weights{$arg} if $weights{$arg} != 1;
197 >    $printOutput = 1 if $weights{$arg} != 1;
198 >  }
199 > print $output if $printOutput;
200 >
201 > my $goodEvents = countEvents ("$opt{'prefix'}.root", $cutFlow);
202 > printf "$nGoodJobs jobs ran successfully over $nTotalEvents (%.1f weighted) events.\n", $goodEvents if $counting;
203 > print "$nBadJobs jobs failed to run.\n" if $counting;
204 > print "$nIncompleteJobs jobs have not finished.\n" if $counting;
205 > print "=============================================\n";
206  
207   sub
208   processArgs
# Line 140 | Line 246 | printHelp
246    my $exeName = $0;
247    $exeName =~ s/^.*\/([^\/]*)$/$1/;
248  
249 <  print "Usage: $exeName -p PREFIX [OPTION]... DIRECTORIES_AND_FILES\n";
250 <  print "Merges ROOT files containing histograms and produces a cutflow table in the\n";
251 <  print "form of a LaTeX document from the histogram named \"cutFlow\". If there are\n";
252 <  print "Condor logs in the specified directories, checks for nonzero return values.\n";
249 >  print "Usage: $exeName [OPTION]... -p PREFIX DIRECTORIES_AND_FILES\n";
250 >  print "Merges ROOT files containing histograms. If there are Condor logs in the\n";
251 >  print "specified directories, checks for nonzero return values. If the directories\n";
252 >  print "where created by \"osusub\", uses the cross section from the database to weight\n";
253 >  print "all histograms.\n";
254    print "\n";
255    print "Mandatory arguments to long options are mandatory for short options too.\n";
256 +  printf "%-29s%s\n", "  -c, --cutflow HISTOGRAM", "name of histogram to use for the cutflow (default:";
257 +  printf "%-29s%s\n", "                  ", "cutFlow)";
258    printf "%-29s%s\n", "  -h, --help", "print this help message";
259 <  printf "%-29s%s\n", "  -p, --prefix PREFIX", "output files are named PREFIX.root and PREFIX.tex";
260 <  printf "%-29s%s\n", "  -w, --weight WEIGHT", "scale the output by WEIGHT";
259 >  printf "%-29s%s\n", "  -l, --luminosity", "integrated luminosity to which the histograms are";
260 >  printf "%-29s%s\n", "                  ", "weighted (default: 10000/pb)";
261 >  printf "%-29s%s\n", "  -p, --prefix PREFIX", "output is  named PREFIX.root";
262 >  printf "%-29s%s\n", "  -w, --weight WEIGHT", "scale the output by WEIGHT, overriding the";
263 >  printf "%-29s%s\n", "                     ", "automatic weighting using the cross section from";
264 >  printf "%-29s%s\n", "                     ", "the database; WEIGHT may be a single number or a";
265 >  printf "%-29s%s\n", "                     ", "comma-separated list, one for each input file";
266 >  printf "%-29s%s\n", "  -x, --xsection XSECTION", "use XSECTION to weight the histograms instead of";
267 >  printf "%-29s%s\n", "                     ", "the value in the database";
268  
269    exit;
270   }
# Line 175 | Line 291 | getRunList
291   sub
292   countEvents
293   {
294 <  my $runList = shift;
295 <  my $jobNumber = shift;
296 <  my $nJobs = shift;
297 <  my $events = shift;
298 <  my $lumis = shift;
183 <  my $runs = shift;
184 <
185 <  my $filesPerJob = ceil (@$runList / $nJobs);
186 <  print "Job $jobNumber ran successfully on $filesPerJob files.\n";
187 <  my @subRunList = @$runList[($jobNumber * $filesPerJob)..($jobNumber * $filesPerJob + $filesPerJob - 1)];
188 <  foreach my $file (@subRunList)
294 >  my $file = shift;
295 >  my $cutFlow = shift;
296 >
297 >  my $output = `getEventsFromCutFlow $file $cutFlow`;
298 >  if ($output =~ m/Did not find a histogram named/ || $output =~ m/appears to be empty/ || $output =~ m/Failed to open/)
299      {
300 <      my @output = `edmFileUtil $file`;
301 <      my $output = join (" ", @output);
192 <      $output =~ s/\n//g;
193 <      my $eventsStr = $output;
194 <      my $lumisStr = $output;
195 <      my $runsStr = $output;
196 <      $eventsStr =~ s/^.*\([^ ]* runs, [^ ]* lumis, ([^ ]*) events, [^ ]* bytes\).*$/$1/;
197 <      $lumisStr =~ s/^.*\([^ ]* runs, ([^ ]*) lumis, [^ ]* events, [^ ]* bytes\).*$/$1/;
198 <      $runsStr =~ s/^.*\(([^ ]*) runs, [^ ]* lumis, [^ ]* events, [^ ]* bytes\).*$/$1/;
199 <      $$events += $eventsStr;
200 <      $$lumis += $lumisStr;
201 <      $$runs += $runsStr;
300 >      print $output;
301 >      return -1;
302      }
303 +  $output =~ s/^.*: (.*)$/$1/;
304 +  $output =~ s/\n//g;
305 +
306 +  return $output;
307 + }
308 +
309 + sub
310 + isEDM
311 + {
312 +  my $file = shift;
313 +
314 +  my $output = `edmFileUtil $file 2>&1`;
315 +  return 0 if $output =~ m/appears to be missing/ || $output =~ m/not a ROOT file/;
316 +  return 1;
317   }

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines