ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
Revision: 1.9
Committed: Wed Aug 7 18:46:11 2013 UTC (11 years, 9 months ago) by ahart
Branch: MAIN
Changes since 1.8: +30 -24 lines
Log Message:
Added an option for moving the good files to a separate directory instead of removing the bad files from the original output directory.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Getopt::Long;
5 use Mysql;
6 use Term::ANSIColor;
7
8 sub printHelp;
9 sub parseCrabDir;
10
11 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
12
13 my %opt;
14 Getopt::Long::Configure ("bundling");
15 GetOptions (\%opt, "dumb|d", "force|f", "destination|o=s", "dataset|s", "help|h");
16
17 printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
18 my $crabDir = $ARGV[0];
19 my $outputDir = $ARGV[0];
20 my $dataset = $ARGV[0];
21 $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
22
23 if ($opt{"dataset"})
24 {
25 my $queryDataset = $dataset;
26 $queryDataset =~ s/\*/%/g;
27 $queryDataset =~ s/(.*)/%$1%/g;
28 my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime";
29 $db->selectdb ("ntuple");
30 my $results = $db->query ($query);
31 if ($results->numrows () == 1)
32 {
33 my @row = $results->fetchrow ();
34 $crabDir = "$row[0]/ntuple";
35 $outputDir = $row[1];
36 }
37 die "Database entry does not exist, stopped" if $results->numrows () == 0;
38 if ($results->numrows () > 1)
39 {
40 my %workingDir;
41 my %location;
42 print "Found multiple database entries matching\n";
43 print "\"$dataset\":\n";
44 for (my $i = 1; $i <= $results->numrows (); $i++)
45 {
46 my @row = $results->fetchrow ();
47 $row[4] =~ s/([^ ]*) [^ ]*/$1/g;
48 $workingDir{"$i"} = $row[0];
49 $location{"$i"} = $row[1];
50 printf "(%2d) $row[2]\n", $i;
51 print " (";
52 print color "green" if $row[5] eq "present";
53 print color "bold yellow" if $row[5] eq "submitted";
54 print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
55 print $row[5];
56 print color "reset";
57 print ") created by $row[3] on $row[4]\n";
58 }
59 print "\nWhich entry would you like to use?: ";
60 my $response = <STDIN>;
61 $response =~ s/[ \t\n]//g;
62 die "Your selection was not a valid option, stopped" if !(exists $workingDir{$response});
63 $crabDir = "$workingDir{$response}/ntuple";
64 $outputDir = $location{$response};
65 }
66 }
67
68 die "Output directory does not exist, stopped" if !(-e $outputDir) || !(-d $outputDir);
69 opendir (CRAB_OUTPUT, "$outputDir");
70 my @crabOutput = readdir (CRAB_OUTPUT);
71 closedir (CRAB_OUTPUT);
72 my %filesToKeep;
73 if (!$opt{"dumb"})
74 {
75 die "CRAB directory does not exist, stopped" if !(-e $crabDir) || !(-d $crabDir);
76 parseCrabDir ($crabDir, \%filesToKeep);
77 }
78 else
79 {
80 foreach my $file (@crabOutput)
81 {
82 next if ($file eq "." || $file eq "..");
83 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
84 my $jobNumber = $file;
85 my $submissionNumber = $file;
86 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
87 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
88 if (!(defined $filesToKeep{$jobNumber})
89 || $filesToKeep{$jobNumber} < $submissionNumber)
90 {
91 $filesToKeep{$jobNumber} = $submissionNumber;
92 }
93 }
94 }
95 if ($opt{"destination"})
96 {
97 mkdir $opt{"destination"} or die "Failed to create destination directory, stopped" if !(-e $opt{"destination"});
98 die "Destination is not a directory, stopped" if !(-d $opt{"destination"});
99 open (TEST_FILE, ">$opt{'destination'}/.crabCleanTest") or die "Unable to write to destination directory, stopped";
100 close (TEST_FILE);
101 unlink "$opt{'destination'}/.crabCleanTest";
102 }
103 opendir (CRAB_OUTPUT, "$outputDir");
104 my @crabOutput = readdir (CRAB_OUTPUT);
105 closedir (CRAB_OUTPUT);
106 my $nFiles = @crabOutput - 2;
107 print "Before cleaning: $nFiles files\n";
108 foreach my $file (@crabOutput)
109 {
110 next if ($file eq "." || $file eq "..");
111 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
112 my $jobNumber = $file;
113 my $submissionNumber = $file;
114 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
115 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
116 if (!(defined $filesToKeep{$jobNumber}))
117 {
118 print "No information found for job $jobNumber!\n";
119 }
120 elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
121 || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
122 && !$opt{"destination"})
123 {
124 system ("rm -f $outputDir/$file") if $opt{"force"};
125 system ("rm -i $outputDir/$file") if !$opt{"force"};
126 }
127 elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} == $submissionNumber)
128 || (!$opt{"dumb"} && $filesToKeep{$jobNumber} eq $file))
129 && $opt{"destination"})
130 {
131 system ("mv -f $outputDir/$file $opt{'destination'}/$file") if $opt{"force"};
132 system ("mv -i $outputDir/$file $opt{'destination'}/$file") if !$opt{"force"};
133 }
134 }
135 opendir (CRAB_OUTPUT, "$outputDir");
136 my @crabOutput = readdir (CRAB_OUTPUT);
137 closedir (CRAB_OUTPUT);
138 opendir (CRAB_OUTPUT, "$opt{'destination'}");
139 my @crabOutputInDestination = readdir (CRAB_OUTPUT);
140 closedir (CRAB_OUTPUT);
141 my $nFiles = @crabOutput - 2;
142 $nFiles = @crabOutputInDestination - 2 if $opt{"destination"};
143 print "After cleaning: $nFiles files\n";
144
145 sub
146 printHelp
147 {
148 my $exeName = $0;
149 $exeName =~ s/^.*\/([^\/]*)$/$1/;
150
151 print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
152 print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
153 print " or: $exeName -s [OPTION]... DATASET\n";
154 print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
155 print "contained in the CRAB working directory provided.\n";
156 print "\n";
157 print "Mandatory arguments to long options are mandatory for short options too.\n";
158 printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
159 printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
160 printf "%-29s%s\n", " ", "if the working directory has been lost";
161 printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
162 printf "%-29s%s\n", " ", "removal)";
163 printf "%-29s%s\n", " -h, --help", "print this help message";
164 printf "%-29s%s\n", " -o, --destination DIR", "do not remove excess output and move the good files";
165 printf "%-29s%s\n", " ", "to DIR";
166 printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
167 printf "%-29s%s\n", " ", "of a directory";
168
169 exit;
170 }
171
172 sub
173 parseCrabDir
174 {
175 my $crabDir = shift;
176 my $filesToKeep = shift;
177
178 opendir (CRAB_RESULTS, "$crabDir/res");
179 my @crabResults = readdir (CRAB_RESULTS);
180 closedir (CRAB_RESULTS);
181 for my $file (@crabResults)
182 {
183 next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
184 my $jobNumber = $file;
185 $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
186 my @status = `grep 'ExitStatus' $crabDir/res/$file`;
187 my $wrapperStatus = $status[0];
188 $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
189 $wrapperStatus =~ s/\n//g;
190 if ($wrapperStatus != 0)
191 {
192 print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
193 next;
194 }
195 my @surlForGrid = `grep -A 1 '<SurlForGrid' $crabDir/res/$file`;
196 my $fileName = $surlForGrid[1];
197 $fileName = $surlForGrid[0] if $surlForGrid[0] =~ m/Value=/;
198 $fileName =~ s/^.*\/([^\/]*)\n/$1/ if !($fileName =~ m/Value=/);
199 $fileName =~ s/^.*\/([^\/]*)"\/>\n/$1/ if $fileName =~ m/Value=/;
200 $filesToKeep->{$jobNumber} = $fileName;
201 }
202 }