2 |
|
|
3 |
|
use strict; |
4 |
|
use Getopt::Long; |
5 |
+ |
use Mysql; |
6 |
+ |
|
7 |
+ |
sub printHelp; |
8 |
+ |
sub parseCrabDir; |
9 |
+ |
|
10 |
+ |
our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped"; |
11 |
|
|
12 |
|
my %opt; |
13 |
|
Getopt::Long::Configure ("bundling"); |
14 |
< |
GetOptions (\%opt, "force|f", "help|h"); |
14 |
> |
GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h"); |
15 |
> |
|
16 |
> |
printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]); |
17 |
> |
my $crabDir = $ARGV[0]; |
18 |
> |
my $outputDir = $ARGV[0]; |
19 |
> |
my $dataset = $ARGV[0]; |
20 |
> |
$outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"}; |
21 |
|
|
22 |
< |
printHelp () if $opt{"help"} || !$ARGV[0]; |
23 |
< |
opendir (CRAB_OUTPUT, "$ARGV[0]"); |
22 |
> |
if ($opt{"dataset"}) |
23 |
> |
{ |
24 |
> |
my $query = "select workingDirectory,location,dataset,user,creationTime from ntuple where dataset like '$dataset' order by creationTime"; |
25 |
> |
$db->selectdb ("ntuple"); |
26 |
> |
my $results = $db->query ($query); |
27 |
> |
if ($results->numrows () == 1) |
28 |
> |
{ |
29 |
> |
my @row = $results->fetchrow (); |
30 |
> |
$crabDir = "$row[0]/ntuple"; |
31 |
> |
$outputDir = $row[1]; |
32 |
> |
} |
33 |
> |
if ($results->numrows () == 0) |
34 |
> |
{ |
35 |
> |
print "Database entry does not exist.\n"; |
36 |
> |
exit; |
37 |
> |
} |
38 |
> |
my %workingDir; |
39 |
> |
my %location; |
40 |
> |
print "Found multiple database entries matching\n"; |
41 |
> |
print "\"$dataset\":\n"; |
42 |
> |
for (my $i = 1; $i <= $results->numrows (); $i++) |
43 |
> |
{ |
44 |
> |
my @row = $results->fetchrow (); |
45 |
> |
$workingDir{"$i"} = $row[0]; |
46 |
> |
$location{"$i"} = $row[1]; |
47 |
> |
printf "(%2d) $row[2]\n", $i; |
48 |
> |
print " created by $row[3] on $row[4]\n"; |
49 |
> |
} |
50 |
> |
print "\nWhich entry would you like to use?: "; |
51 |
> |
my $response = <STDIN>; |
52 |
> |
$response =~ s/[ \t\n]//g; |
53 |
> |
if (!(exists $workingDir{$response})) |
54 |
> |
{ |
55 |
> |
print "Your selection \"$response\" was not a valid option! Quitting.\n"; |
56 |
> |
exit; |
57 |
> |
} |
58 |
> |
$crabDir = "$workingDir{$response}/ntuple"; |
59 |
> |
$outputDir = $location{$response}; |
60 |
> |
} |
61 |
> |
|
62 |
> |
if (!(-e $outputDir) || !(-d $outputDir)) |
63 |
> |
{ |
64 |
> |
print "Output directory $outputDir does not exist!\n"; |
65 |
> |
exit; |
66 |
> |
} |
67 |
> |
opendir (CRAB_OUTPUT, "$outputDir"); |
68 |
|
my @crabOutput = readdir (CRAB_OUTPUT); |
69 |
|
closedir (CRAB_OUTPUT); |
70 |
|
my %filesToKeep; |
71 |
< |
foreach my $file (@crabOutput) |
71 |
> |
if (!$opt{"dumb"}) |
72 |
|
{ |
73 |
< |
next if ($file eq "." || $file eq ".."); |
18 |
< |
next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/); |
19 |
< |
my $jobNumber = $file; |
20 |
< |
my $submissionNumber = $file; |
21 |
< |
$jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/; |
22 |
< |
$submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/; |
23 |
< |
if (!(defined $filesToKeep{$jobNumber}) |
24 |
< |
|| $filesToKeep{$jobNumber} < $submissionNumber) |
73 |
> |
if (!(-e $crabDir) || !(-d $crabDir)) |
74 |
|
{ |
75 |
< |
$filesToKeep{$jobNumber} = $submissionNumber; |
75 |
> |
print "CRAB directory $crabDir does not exist!\n"; |
76 |
> |
exit; |
77 |
> |
} |
78 |
> |
parseCrabDir ($crabDir, \%filesToKeep); |
79 |
> |
} |
80 |
> |
else |
81 |
> |
{ |
82 |
> |
foreach my $file (@crabOutput) |
83 |
> |
{ |
84 |
> |
next if ($file eq "." || $file eq ".."); |
85 |
> |
next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/); |
86 |
> |
my $jobNumber = $file; |
87 |
> |
my $submissionNumber = $file; |
88 |
> |
$jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/; |
89 |
> |
$submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/; |
90 |
> |
if (!(defined $filesToKeep{$jobNumber}) |
91 |
> |
|| $filesToKeep{$jobNumber} < $submissionNumber) |
92 |
> |
{ |
93 |
> |
$filesToKeep{$jobNumber} = $submissionNumber; |
94 |
> |
} |
95 |
|
} |
96 |
|
} |
97 |
|
foreach my $file (@crabOutput) |
102 |
|
my $submissionNumber = $file; |
103 |
|
$jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/; |
104 |
|
$submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/; |
105 |
< |
if ($filesToKeep{$jobNumber} != $submissionNumber) |
105 |
> |
if (!(defined $filesToKeep{$jobNumber})) |
106 |
> |
{ |
107 |
> |
print "No information found for job $jobNumber!\n"; |
108 |
> |
} |
109 |
> |
elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber) |
110 |
> |
|| (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file)) |
111 |
|
{ |
112 |
< |
system ("rm -f $ARGV[0]/$file") if $opt{"force"}; |
113 |
< |
system ("rm -i $ARGV[0]/$file") if !$opt{"force"}; |
112 |
> |
system ("rm -f $outputDir/$file") if $opt{"force"}; |
113 |
> |
system ("rm -i $outputDir/$file") if !$opt{"force"}; |
114 |
|
} |
115 |
|
} |
116 |
|
|
120 |
|
my $exeName = $0; |
121 |
|
$exeName =~ s/^.*\/([^\/]*)$/$1/; |
122 |
|
|
123 |
< |
print "Usage: $exeName [OPTION]... DIRECTORIES\n"; |
124 |
< |
print "Deletes excess EDM output caused by multiple CRAB submissions.\n"; |
123 |
> |
print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n"; |
124 |
> |
print " or: $exeName -d [OPTION]... OUTPUT_DIR\n"; |
125 |
> |
print " or: $exeName -s [OPTION]... DATASET\n"; |
126 |
> |
print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n"; |
127 |
> |
print "contained in the CRAB working directory provided.\n"; |
128 |
|
print "\n"; |
129 |
|
print "Mandatory arguments to long options are mandatory for short options too.\n"; |
130 |
+ |
printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of"; |
131 |
+ |
printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful"; |
132 |
+ |
printf "%-29s%s\n", " ", "if the working directory has been lost"; |
133 |
|
printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any"; |
134 |
|
printf "%-29s%s\n", " ", "removal)"; |
135 |
|
printf "%-29s%s\n", " -h, --help", "print this help message"; |
136 |
+ |
printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead"; |
137 |
+ |
printf "%-29s%s\n", " ", "of a directory"; |
138 |
|
|
139 |
|
exit; |
140 |
|
} |
141 |
+ |
|
142 |
+ |
sub |
143 |
+ |
parseCrabDir |
144 |
+ |
{ |
145 |
+ |
my $crabDir = shift; |
146 |
+ |
my $filesToKeep = shift; |
147 |
+ |
|
148 |
+ |
opendir (CRAB_RESULTS, "$crabDir/res"); |
149 |
+ |
my @crabResults = readdir (CRAB_RESULTS); |
150 |
+ |
closedir (CRAB_RESULTS); |
151 |
+ |
for my $file (@crabResults) |
152 |
+ |
{ |
153 |
+ |
next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/); |
154 |
+ |
my $jobNumber = $file; |
155 |
+ |
$jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/; |
156 |
+ |
my @status = `grep 'ExitStatus' $crabDir/res/$file`; |
157 |
+ |
my $wrapperStatus = $status[0]; |
158 |
+ |
$wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/; |
159 |
+ |
$wrapperStatus =~ s/\n//g; |
160 |
+ |
if ($wrapperStatus != 0) |
161 |
+ |
{ |
162 |
+ |
print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n"; |
163 |
+ |
next; |
164 |
+ |
} |
165 |
+ |
my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`; |
166 |
+ |
my $fileName = $surlForGrid[1]; |
167 |
+ |
$fileName =~ s/^.*\/([^\/]*)\n/$1/; |
168 |
+ |
$filesToKeep->{$jobNumber} = $fileName; |
169 |
+ |
} |
170 |
+ |
} |