3 |
|
use strict; |
4 |
|
use Getopt::Long; |
5 |
|
use Mysql; |
6 |
+ |
use Term::ANSIColor; |
7 |
|
|
8 |
|
sub printHelp; |
9 |
|
sub parseCrabDir; |
12 |
|
|
13 |
|
my %opt; |
14 |
|
Getopt::Long::Configure ("bundling"); |
15 |
< |
GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h"); |
15 |
> |
GetOptions (\%opt, "dumb|d", "force|f", "destination|o=s", "print|p", "dataset|s", "help|h"); |
16 |
|
|
17 |
< |
printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]); |
17 |
> |
printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$opt{"print"} && !$ARGV[1]); |
18 |
|
my $crabDir = $ARGV[0]; |
19 |
|
my $outputDir = $ARGV[0]; |
20 |
|
my $dataset = $ARGV[0]; |
22 |
|
|
23 |
|
if ($opt{"dataset"}) |
24 |
|
{ |
25 |
< |
my $query = "select workingDirectory,location,dataset,user,creationTime from ntuple where dataset like '$dataset' order by creationTime"; |
25 |
> |
my $queryDataset = $dataset; |
26 |
> |
$queryDataset =~ s/\*/%/g; |
27 |
> |
$queryDataset =~ s/(.*)/%$1%/g; |
28 |
> |
my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime"; |
29 |
|
$db->selectdb ("ntuple"); |
30 |
|
my $results = $db->query ($query); |
31 |
|
if ($results->numrows () == 1) |
34 |
|
$crabDir = "$row[0]/ntuple"; |
35 |
|
$outputDir = $row[1]; |
36 |
|
} |
37 |
< |
if ($results->numrows () == 0) |
37 |
> |
die "Database entry does not exist, stopped" if $results->numrows () == 0; |
38 |
> |
if ($results->numrows () > 1) |
39 |
|
{ |
40 |
< |
print "Database entry does not exist.\n"; |
41 |
< |
exit; |
42 |
< |
} |
43 |
< |
my %workingDir; |
44 |
< |
my %location; |
45 |
< |
print "Found multiple database entries matching\n"; |
46 |
< |
print "\"$dataset\":\n"; |
47 |
< |
for (my $i = 1; $i <= $results->numrows (); $i++) |
48 |
< |
{ |
49 |
< |
my @row = $results->fetchrow (); |
50 |
< |
$workingDir{"$i"} = $row[0]; |
51 |
< |
$location{"$i"} = $row[1]; |
52 |
< |
printf "(%2d) $row[2]\n", $i; |
53 |
< |
print " created by $row[3] on $row[4]\n"; |
54 |
< |
} |
55 |
< |
print "\nWhich entry would you like to use?: "; |
56 |
< |
my $response = <STDIN>; |
57 |
< |
$response =~ s/[ \t\n]//g; |
58 |
< |
if (!(exists $workingDir{$response})) |
59 |
< |
{ |
60 |
< |
print "Your selection \"$response\" was not a valid option! Quitting.\n"; |
61 |
< |
exit; |
40 |
> |
my %workingDir; |
41 |
> |
my %location; |
42 |
> |
print "Found multiple database entries matching\n"; |
43 |
> |
print "\"$dataset\":\n"; |
44 |
> |
for (my $i = 1; $i <= $results->numrows (); $i++) |
45 |
> |
{ |
46 |
> |
my @row = $results->fetchrow (); |
47 |
> |
$row[4] =~ s/([^ ]*) [^ ]*/$1/g; |
48 |
> |
$workingDir{"$i"} = $row[0]; |
49 |
> |
$location{"$i"} = $row[1]; |
50 |
> |
printf "(%2d) $row[2]\n", $i; |
51 |
> |
print " ("; |
52 |
> |
print color "green" if $row[5] eq "present"; |
53 |
> |
print color "bold yellow" if $row[5] eq "submitted"; |
54 |
> |
print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated"; |
55 |
> |
print $row[5]; |
56 |
> |
print color "reset"; |
57 |
> |
print ") created by $row[3] on $row[4]\n"; |
58 |
> |
} |
59 |
> |
print "\nWhich entry would you like to use?: "; |
60 |
> |
my $response = <STDIN>; |
61 |
> |
$response =~ s/[ \t\n]//g; |
62 |
> |
die "Your selection was not a valid option, stopped" if !(exists $workingDir{$response}); |
63 |
> |
$crabDir = "$workingDir{$response}/ntuple"; |
64 |
> |
$outputDir = $location{$response}; |
65 |
|
} |
58 |
– |
$crabDir = "$workingDir{$response}/ntuple"; |
59 |
– |
$outputDir = $location{$response}; |
66 |
|
} |
67 |
|
|
68 |
< |
if (!(-e $outputDir) || !(-d $outputDir)) |
68 |
> |
die "Output directory does not exist, stopped" if !$opt{"print"} && (!(-e $outputDir) || !(-d $outputDir)); |
69 |
> |
my @crabOutput; |
70 |
> |
if (!$opt{"print"}) |
71 |
|
{ |
72 |
< |
print "Output directory $outputDir does not exist!\n"; |
73 |
< |
exit; |
72 |
> |
opendir (CRAB_OUTPUT, "$outputDir"); |
73 |
> |
@crabOutput = readdir (CRAB_OUTPUT); |
74 |
> |
closedir (CRAB_OUTPUT); |
75 |
|
} |
67 |
– |
opendir (CRAB_OUTPUT, "$outputDir"); |
68 |
– |
my @crabOutput = readdir (CRAB_OUTPUT); |
69 |
– |
closedir (CRAB_OUTPUT); |
76 |
|
my %filesToKeep; |
77 |
|
if (!$opt{"dumb"}) |
78 |
|
{ |
79 |
< |
if (!(-e $crabDir) || !(-d $crabDir)) |
74 |
< |
{ |
75 |
< |
print "CRAB directory $crabDir does not exist!\n"; |
76 |
< |
exit; |
77 |
< |
} |
79 |
> |
die "CRAB directory does not exist, stopped" if !(-e $crabDir) || !(-d $crabDir); |
80 |
|
parseCrabDir ($crabDir, \%filesToKeep); |
81 |
|
} |
82 |
|
else |
96 |
|
} |
97 |
|
} |
98 |
|
} |
99 |
+ |
if ($opt{"destination"}) |
100 |
+ |
{ |
101 |
+ |
mkdir $opt{"destination"} or die "Failed to create destination directory, stopped" if !(-e $opt{"destination"}); |
102 |
+ |
die "Destination is not a directory, stopped" if !(-d $opt{"destination"}); |
103 |
+ |
open (TEST_FILE, ">$opt{'destination'}/.crabCleanTest") or die "Unable to write to destination directory, stopped"; |
104 |
+ |
close (TEST_FILE); |
105 |
+ |
unlink "$opt{'destination'}/.crabCleanTest"; |
106 |
+ |
} |
107 |
+ |
if (!$opt{"print"}) |
108 |
+ |
{ |
109 |
+ |
opendir (CRAB_OUTPUT, "$outputDir"); |
110 |
+ |
my @crabOutput = readdir (CRAB_OUTPUT); |
111 |
+ |
closedir (CRAB_OUTPUT); |
112 |
+ |
my $nFiles = @crabOutput - 2; |
113 |
+ |
print "Before cleaning: $nFiles files\n"; |
114 |
+ |
} |
115 |
|
foreach my $file (@crabOutput) |
116 |
|
{ |
117 |
|
next if ($file eq "." || $file eq ".."); |
124 |
|
{ |
125 |
|
print "No information found for job $jobNumber!\n"; |
126 |
|
} |
127 |
< |
elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber) |
127 |
> |
elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber) |
128 |
|
|| (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file)) |
129 |
+ |
&& !$opt{"destination"} && !$opt{"print"}) |
130 |
|
{ |
131 |
|
system ("rm -f $outputDir/$file") if $opt{"force"}; |
132 |
|
system ("rm -i $outputDir/$file") if !$opt{"force"}; |
133 |
|
} |
134 |
+ |
elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} == $submissionNumber) |
135 |
+ |
|| (!$opt{"dumb"} && $filesToKeep{$jobNumber} eq $file)) |
136 |
+ |
&& $opt{"destination"} && !$opt{"print"}) |
137 |
+ |
{ |
138 |
+ |
system ("mv -f $outputDir/$file $opt{'destination'}/$file") if $opt{"force"}; |
139 |
+ |
system ("mv -i $outputDir/$file $opt{'destination'}/$file") if !$opt{"force"}; |
140 |
+ |
} |
141 |
+ |
} |
142 |
+ |
if (!$opt{"print"}) |
143 |
+ |
{ |
144 |
+ |
opendir (CRAB_OUTPUT, "$outputDir"); |
145 |
+ |
my @crabOutput = readdir (CRAB_OUTPUT); |
146 |
+ |
closedir (CRAB_OUTPUT); |
147 |
+ |
opendir (CRAB_OUTPUT, "$opt{'destination'}"); |
148 |
+ |
my @crabOutputInDestination = readdir (CRAB_OUTPUT); |
149 |
+ |
closedir (CRAB_OUTPUT); |
150 |
+ |
my $nFiles = @crabOutput - 2; |
151 |
+ |
$nFiles = @crabOutputInDestination - 2 if $opt{"destination"}; |
152 |
+ |
print "After cleaning: $nFiles files\n"; |
153 |
+ |
} |
154 |
+ |
else |
155 |
+ |
{ |
156 |
+ |
foreach my $file (keys %filesToKeep) |
157 |
+ |
{ |
158 |
+ |
print $filesToKeep{$file} . "\n"; |
159 |
+ |
} |
160 |
|
} |
161 |
|
|
162 |
|
sub |
167 |
|
|
168 |
|
print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n"; |
169 |
|
print " or: $exeName -d [OPTION]... OUTPUT_DIR\n"; |
170 |
+ |
print " or: $exeName -p [OPTION]... CRAB_DIR\n"; |
171 |
|
print " or: $exeName -s [OPTION]... DATASET\n"; |
172 |
|
print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n"; |
173 |
|
print "contained in the CRAB working directory provided.\n"; |
179 |
|
printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any"; |
180 |
|
printf "%-29s%s\n", " ", "removal)"; |
181 |
|
printf "%-29s%s\n", " -h, --help", "print this help message"; |
182 |
+ |
printf "%-29s%s\n", " -o, --destination DIR", "do not remove excess output and move the good files"; |
183 |
+ |
printf "%-29s%s\n", " ", "to DIR"; |
184 |
+ |
printf "%-29s%s\n", " -p, --print", "do not remove any files, just print the names of"; |
185 |
+ |
printf "%-29s%s\n", " ", "the good files to the screen"; |
186 |
|
printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead"; |
187 |
|
printf "%-29s%s\n", " ", "of a directory"; |
188 |
|
|
212 |
|
print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n"; |
213 |
|
next; |
214 |
|
} |
215 |
< |
my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`; |
215 |
> |
my @surlForGrid = `grep -A 1 '<SurlForGrid' $crabDir/res/$file`; |
216 |
|
my $fileName = $surlForGrid[1]; |
217 |
< |
$fileName =~ s/^.*\/([^\/]*)\n/$1/; |
217 |
> |
$fileName = $surlForGrid[0] if $surlForGrid[0] =~ m/Value=/; |
218 |
> |
$fileName =~ s/^.*\/([^\/]*)\n/$1/ if !($fileName =~ m/Value=/); |
219 |
> |
$fileName =~ s/^.*\/([^\/]*)"\/>\n/$1/ if $fileName =~ m/Value=/; |
220 |
|
$filesToKeep->{$jobNumber} = $fileName; |
221 |
|
} |
222 |
|
} |