1 |
ahart |
1.1 |
#!/usr/bin/env perl
|
2 |
|
|
|
3 |
|
|
use strict;
|
4 |
|
|
use Getopt::Long;
|
5 |
ahart |
1.4 |
use Mysql;
|
6 |
ahart |
1.7 |
use Term::ANSIColor;
|
7 |
ahart |
1.1 |
|
8 |
ahart |
1.2 |
sub printHelp;
|
9 |
|
|
sub parseCrabDir;
|
10 |
|
|
|
11 |
ahart |
1.4 |
our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
|
12 |
|
|
|
13 |
ahart |
1.1 |
my %opt;
|
14 |
|
|
Getopt::Long::Configure ("bundling");
|
15 |
ahart |
1.10 |
GetOptions (\%opt, "dumb|d", "force|f", "destination|o=s", "print|p", "dataset|s", "help|h");
|
16 |
ahart |
1.1 |
|
17 |
ahart |
1.10 |
printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$opt{"print"} && !$ARGV[1]);
|
18 |
ahart |
1.2 |
my $crabDir = $ARGV[0];
|
19 |
|
|
my $outputDir = $ARGV[0];
|
20 |
ahart |
1.4 |
my $dataset = $ARGV[0];
|
21 |
|
|
$outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
|
22 |
|
|
|
23 |
|
|
if ($opt{"dataset"})
|
24 |
|
|
{
|
25 |
ahart |
1.6 |
my $queryDataset = $dataset;
|
26 |
|
|
$queryDataset =~ s/\*/%/g;
|
27 |
|
|
$queryDataset =~ s/(.*)/%$1%/g;
|
28 |
ahart |
1.7 |
my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime";
|
29 |
ahart |
1.4 |
$db->selectdb ("ntuple");
|
30 |
|
|
my $results = $db->query ($query);
|
31 |
|
|
if ($results->numrows () == 1)
|
32 |
|
|
{
|
33 |
|
|
my @row = $results->fetchrow ();
|
34 |
|
|
$crabDir = "$row[0]/ntuple";
|
35 |
|
|
$outputDir = $row[1];
|
36 |
|
|
}
|
37 |
ahart |
1.9 |
die "Database entry does not exist, stopped" if $results->numrows () == 0;
|
38 |
ahart |
1.5 |
if ($results->numrows () > 1)
|
39 |
ahart |
1.4 |
{
|
40 |
ahart |
1.5 |
my %workingDir;
|
41 |
|
|
my %location;
|
42 |
|
|
print "Found multiple database entries matching\n";
|
43 |
|
|
print "\"$dataset\":\n";
|
44 |
|
|
for (my $i = 1; $i <= $results->numrows (); $i++)
|
45 |
|
|
{
|
46 |
|
|
my @row = $results->fetchrow ();
|
47 |
ahart |
1.7 |
$row[4] =~ s/([^ ]*) [^ ]*/$1/g;
|
48 |
ahart |
1.5 |
$workingDir{"$i"} = $row[0];
|
49 |
|
|
$location{"$i"} = $row[1];
|
50 |
|
|
printf "(%2d) $row[2]\n", $i;
|
51 |
ahart |
1.7 |
print " (";
|
52 |
|
|
print color "green" if $row[5] eq "present";
|
53 |
|
|
print color "bold yellow" if $row[5] eq "submitted";
|
54 |
|
|
print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
|
55 |
|
|
print $row[5];
|
56 |
|
|
print color "reset";
|
57 |
|
|
print ") created by $row[3] on $row[4]\n";
|
58 |
ahart |
1.5 |
}
|
59 |
|
|
print "\nWhich entry would you like to use?: ";
|
60 |
|
|
my $response = <STDIN>;
|
61 |
|
|
$response =~ s/[ \t\n]//g;
|
62 |
ahart |
1.9 |
die "Your selection was not a valid option, stopped" if !(exists $workingDir{$response});
|
63 |
ahart |
1.5 |
$crabDir = "$workingDir{$response}/ntuple";
|
64 |
|
|
$outputDir = $location{$response};
|
65 |
ahart |
1.4 |
}
|
66 |
|
|
}
|
67 |
|
|
|
68 |
ahart |
1.10 |
die "Output directory does not exist, stopped" if !$opt{"print"} && (!(-e $outputDir) || !(-d $outputDir));
|
69 |
|
|
my @crabOutput;
|
70 |
|
|
if (!$opt{"print"})
|
71 |
|
|
{
|
72 |
|
|
opendir (CRAB_OUTPUT, "$outputDir");
|
73 |
|
|
@crabOutput = readdir (CRAB_OUTPUT);
|
74 |
|
|
closedir (CRAB_OUTPUT);
|
75 |
|
|
}
|
76 |
ahart |
1.1 |
my %filesToKeep;
|
77 |
ahart |
1.2 |
if (!$opt{"dumb"})
|
78 |
|
|
{
|
79 |
ahart |
1.9 |
die "CRAB directory does not exist, stopped" if !(-e $crabDir) || !(-d $crabDir);
|
80 |
ahart |
1.2 |
parseCrabDir ($crabDir, \%filesToKeep);
|
81 |
|
|
}
|
82 |
|
|
else
|
83 |
ahart |
1.1 |
{
|
84 |
ahart |
1.2 |
foreach my $file (@crabOutput)
|
85 |
ahart |
1.1 |
{
|
86 |
ahart |
1.2 |
next if ($file eq "." || $file eq "..");
|
87 |
|
|
next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
|
88 |
|
|
my $jobNumber = $file;
|
89 |
|
|
my $submissionNumber = $file;
|
90 |
|
|
$jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
|
91 |
|
|
$submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
|
92 |
|
|
if (!(defined $filesToKeep{$jobNumber})
|
93 |
|
|
|| $filesToKeep{$jobNumber} < $submissionNumber)
|
94 |
|
|
{
|
95 |
|
|
$filesToKeep{$jobNumber} = $submissionNumber;
|
96 |
|
|
}
|
97 |
ahart |
1.1 |
}
|
98 |
|
|
}
|
99 |
ahart |
1.9 |
if ($opt{"destination"})
|
100 |
|
|
{
|
101 |
|
|
mkdir $opt{"destination"} or die "Failed to create destination directory, stopped" if !(-e $opt{"destination"});
|
102 |
|
|
die "Destination is not a directory, stopped" if !(-d $opt{"destination"});
|
103 |
|
|
open (TEST_FILE, ">$opt{'destination'}/.crabCleanTest") or die "Unable to write to destination directory, stopped";
|
104 |
|
|
close (TEST_FILE);
|
105 |
|
|
unlink "$opt{'destination'}/.crabCleanTest";
|
106 |
|
|
}
|
107 |
ahart |
1.10 |
if (!$opt{"print"})
|
108 |
|
|
{
|
109 |
|
|
opendir (CRAB_OUTPUT, "$outputDir");
|
110 |
|
|
my @crabOutput = readdir (CRAB_OUTPUT);
|
111 |
|
|
closedir (CRAB_OUTPUT);
|
112 |
|
|
my $nFiles = @crabOutput - 2;
|
113 |
|
|
print "Before cleaning: $nFiles files\n";
|
114 |
|
|
}
|
115 |
ahart |
1.1 |
foreach my $file (@crabOutput)
|
116 |
|
|
{
|
117 |
|
|
next if ($file eq "." || $file eq "..");
|
118 |
|
|
next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
|
119 |
|
|
my $jobNumber = $file;
|
120 |
|
|
my $submissionNumber = $file;
|
121 |
|
|
$jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
|
122 |
|
|
$submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
|
123 |
ahart |
1.2 |
if (!(defined $filesToKeep{$jobNumber}))
|
124 |
|
|
{
|
125 |
|
|
print "No information found for job $jobNumber!\n";
|
126 |
|
|
}
|
127 |
ahart |
1.9 |
elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
|
128 |
ahart |
1.2 |
|| (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
|
129 |
ahart |
1.10 |
&& !$opt{"destination"} && !$opt{"print"})
|
130 |
ahart |
1.1 |
{
|
131 |
ahart |
1.2 |
system ("rm -f $outputDir/$file") if $opt{"force"};
|
132 |
|
|
system ("rm -i $outputDir/$file") if !$opt{"force"};
|
133 |
ahart |
1.1 |
}
|
134 |
ahart |
1.9 |
elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} == $submissionNumber)
|
135 |
|
|
|| (!$opt{"dumb"} && $filesToKeep{$jobNumber} eq $file))
|
136 |
ahart |
1.10 |
&& $opt{"destination"} && !$opt{"print"})
|
137 |
ahart |
1.9 |
{
|
138 |
|
|
system ("mv -f $outputDir/$file $opt{'destination'}/$file") if $opt{"force"};
|
139 |
|
|
system ("mv -i $outputDir/$file $opt{'destination'}/$file") if !$opt{"force"};
|
140 |
|
|
}
|
141 |
ahart |
1.1 |
}
|
142 |
ahart |
1.10 |
if (!$opt{"print"})
|
143 |
|
|
{
|
144 |
|
|
opendir (CRAB_OUTPUT, "$outputDir");
|
145 |
|
|
my @crabOutput = readdir (CRAB_OUTPUT);
|
146 |
|
|
closedir (CRAB_OUTPUT);
|
147 |
|
|
opendir (CRAB_OUTPUT, "$opt{'destination'}");
|
148 |
|
|
my @crabOutputInDestination = readdir (CRAB_OUTPUT);
|
149 |
|
|
closedir (CRAB_OUTPUT);
|
150 |
|
|
my $nFiles = @crabOutput - 2;
|
151 |
|
|
$nFiles = @crabOutputInDestination - 2 if $opt{"destination"};
|
152 |
|
|
print "After cleaning: $nFiles files\n";
|
153 |
|
|
}
|
154 |
|
|
else
|
155 |
|
|
{
|
156 |
|
|
foreach my $file (keys %filesToKeep)
|
157 |
|
|
{
|
158 |
|
|
print $filesToKeep{$file} . "\n";
|
159 |
|
|
}
|
160 |
|
|
}
|
161 |
ahart |
1.1 |
|
162 |
|
|
sub
|
163 |
|
|
printHelp
|
164 |
|
|
{
|
165 |
|
|
my $exeName = $0;
|
166 |
|
|
$exeName =~ s/^.*\/([^\/]*)$/$1/;
|
167 |
|
|
|
168 |
ahart |
1.2 |
print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
|
169 |
ahart |
1.3 |
print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
|
170 |
ahart |
1.10 |
print " or: $exeName -p [OPTION]... CRAB_DIR\n";
|
171 |
ahart |
1.4 |
print " or: $exeName -s [OPTION]... DATASET\n";
|
172 |
ahart |
1.2 |
print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
|
173 |
|
|
print "contained in the CRAB working directory provided.\n";
|
174 |
ahart |
1.1 |
print "\n";
|
175 |
|
|
print "Mandatory arguments to long options are mandatory for short options too.\n";
|
176 |
ahart |
1.2 |
printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
|
177 |
|
|
printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
|
178 |
|
|
printf "%-29s%s\n", " ", "if the working directory has been lost";
|
179 |
ahart |
1.1 |
printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
|
180 |
|
|
printf "%-29s%s\n", " ", "removal)";
|
181 |
|
|
printf "%-29s%s\n", " -h, --help", "print this help message";
|
182 |
ahart |
1.9 |
printf "%-29s%s\n", " -o, --destination DIR", "do not remove excess output and move the good files";
|
183 |
|
|
printf "%-29s%s\n", " ", "to DIR";
|
184 |
ahart |
1.10 |
printf "%-29s%s\n", " -p, --print", "do not remove any files, just print the names of";
|
185 |
|
|
printf "%-29s%s\n", " ", "the good files to the screen";
|
186 |
ahart |
1.4 |
printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
|
187 |
|
|
printf "%-29s%s\n", " ", "of a directory";
|
188 |
ahart |
1.1 |
|
189 |
|
|
exit;
|
190 |
|
|
}
|
191 |
ahart |
1.2 |
|
192 |
|
|
sub
|
193 |
|
|
parseCrabDir
|
194 |
|
|
{
|
195 |
|
|
my $crabDir = shift;
|
196 |
|
|
my $filesToKeep = shift;
|
197 |
|
|
|
198 |
|
|
opendir (CRAB_RESULTS, "$crabDir/res");
|
199 |
|
|
my @crabResults = readdir (CRAB_RESULTS);
|
200 |
|
|
closedir (CRAB_RESULTS);
|
201 |
|
|
for my $file (@crabResults)
|
202 |
|
|
{
|
203 |
|
|
next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
|
204 |
|
|
my $jobNumber = $file;
|
205 |
|
|
$jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
|
206 |
|
|
my @status = `grep 'ExitStatus' $crabDir/res/$file`;
|
207 |
|
|
my $wrapperStatus = $status[0];
|
208 |
|
|
$wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
|
209 |
ahart |
1.3 |
$wrapperStatus =~ s/\n//g;
|
210 |
ahart |
1.2 |
if ($wrapperStatus != 0)
|
211 |
|
|
{
|
212 |
|
|
print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
|
213 |
|
|
next;
|
214 |
|
|
}
|
215 |
ahart |
1.8 |
my @surlForGrid = `grep -A 1 '<SurlForGrid' $crabDir/res/$file`;
|
216 |
ahart |
1.2 |
my $fileName = $surlForGrid[1];
|
217 |
ahart |
1.9 |
$fileName = $surlForGrid[0] if $surlForGrid[0] =~ m/Value=/;
|
218 |
ahart |
1.8 |
$fileName =~ s/^.*\/([^\/]*)\n/$1/ if !($fileName =~ m/Value=/);
|
219 |
ahart |
1.9 |
$fileName =~ s/^.*\/([^\/]*)"\/>\n/$1/ if $fileName =~ m/Value=/;
|
220 |
ahart |
1.2 |
$filesToKeep->{$jobNumber} = $fileName;
|
221 |
|
|
}
|
222 |
|
|
}
|