ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
Revision: 1.10
Committed: Wed Aug 7 23:55:28 2013 UTC (11 years, 8 months ago) by ahart
Branch: MAIN
CVS Tags: HEAD
Changes since 1.9: +42 -22 lines
Log Message:
Added an option for printing the list of good files.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Getopt::Long;
5 ahart 1.4 use Mysql;
6 ahart 1.7 use Term::ANSIColor;
7 ahart 1.1
8 ahart 1.2 sub printHelp;
9     sub parseCrabDir;
10    
11 ahart 1.4 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
12    
13 ahart 1.1 my %opt;
14     Getopt::Long::Configure ("bundling");
15 ahart 1.10 GetOptions (\%opt, "dumb|d", "force|f", "destination|o=s", "print|p", "dataset|s", "help|h");
16 ahart 1.1
17 ahart 1.10 printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$opt{"print"} && !$ARGV[1]);
18 ahart 1.2 my $crabDir = $ARGV[0];
19     my $outputDir = $ARGV[0];
20 ahart 1.4 my $dataset = $ARGV[0];
21     $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
22    
23     if ($opt{"dataset"})
24     {
25 ahart 1.6 my $queryDataset = $dataset;
26     $queryDataset =~ s/\*/%/g;
27     $queryDataset =~ s/(.*)/%$1%/g;
28 ahart 1.7 my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime";
29 ahart 1.4 $db->selectdb ("ntuple");
30     my $results = $db->query ($query);
31     if ($results->numrows () == 1)
32     {
33     my @row = $results->fetchrow ();
34     $crabDir = "$row[0]/ntuple";
35     $outputDir = $row[1];
36     }
37 ahart 1.9 die "Database entry does not exist, stopped" if $results->numrows () == 0;
38 ahart 1.5 if ($results->numrows () > 1)
39 ahart 1.4 {
40 ahart 1.5 my %workingDir;
41     my %location;
42     print "Found multiple database entries matching\n";
43     print "\"$dataset\":\n";
44     for (my $i = 1; $i <= $results->numrows (); $i++)
45     {
46     my @row = $results->fetchrow ();
47 ahart 1.7 $row[4] =~ s/([^ ]*) [^ ]*/$1/g;
48 ahart 1.5 $workingDir{"$i"} = $row[0];
49     $location{"$i"} = $row[1];
50     printf "(%2d) $row[2]\n", $i;
51 ahart 1.7 print " (";
52     print color "green" if $row[5] eq "present";
53     print color "bold yellow" if $row[5] eq "submitted";
54     print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
55     print $row[5];
56     print color "reset";
57     print ") created by $row[3] on $row[4]\n";
58 ahart 1.5 }
59     print "\nWhich entry would you like to use?: ";
60     my $response = <STDIN>;
61     $response =~ s/[ \t\n]//g;
62 ahart 1.9 die "Your selection was not a valid option, stopped" if !(exists $workingDir{$response});
63 ahart 1.5 $crabDir = "$workingDir{$response}/ntuple";
64     $outputDir = $location{$response};
65 ahart 1.4 }
66     }
67    
68 ahart 1.10 die "Output directory does not exist, stopped" if !$opt{"print"} && (!(-e $outputDir) || !(-d $outputDir));
69     my @crabOutput;
70     if (!$opt{"print"})
71     {
72     opendir (CRAB_OUTPUT, "$outputDir");
73     @crabOutput = readdir (CRAB_OUTPUT);
74     closedir (CRAB_OUTPUT);
75     }
76 ahart 1.1 my %filesToKeep;
77 ahart 1.2 if (!$opt{"dumb"})
78     {
79 ahart 1.9 die "CRAB directory does not exist, stopped" if !(-e $crabDir) || !(-d $crabDir);
80 ahart 1.2 parseCrabDir ($crabDir, \%filesToKeep);
81     }
82     else
83 ahart 1.1 {
84 ahart 1.2 foreach my $file (@crabOutput)
85 ahart 1.1 {
86 ahart 1.2 next if ($file eq "." || $file eq "..");
87     next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
88     my $jobNumber = $file;
89     my $submissionNumber = $file;
90     $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
91     $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
92     if (!(defined $filesToKeep{$jobNumber})
93     || $filesToKeep{$jobNumber} < $submissionNumber)
94     {
95     $filesToKeep{$jobNumber} = $submissionNumber;
96     }
97 ahart 1.1 }
98     }
99 ahart 1.9 if ($opt{"destination"})
100     {
101     mkdir $opt{"destination"} or die "Failed to create destination directory, stopped" if !(-e $opt{"destination"});
102     die "Destination is not a directory, stopped" if !(-d $opt{"destination"});
103     open (TEST_FILE, ">$opt{'destination'}/.crabCleanTest") or die "Unable to write to destination directory, stopped";
104     close (TEST_FILE);
105     unlink "$opt{'destination'}/.crabCleanTest";
106     }
107 ahart 1.10 if (!$opt{"print"})
108     {
109     opendir (CRAB_OUTPUT, "$outputDir");
110     my @crabOutput = readdir (CRAB_OUTPUT);
111     closedir (CRAB_OUTPUT);
112     my $nFiles = @crabOutput - 2;
113     print "Before cleaning: $nFiles files\n";
114     }
115 ahart 1.1 foreach my $file (@crabOutput)
116     {
117     next if ($file eq "." || $file eq "..");
118     next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
119     my $jobNumber = $file;
120     my $submissionNumber = $file;
121     $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
122     $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
123 ahart 1.2 if (!(defined $filesToKeep{$jobNumber}))
124     {
125     print "No information found for job $jobNumber!\n";
126     }
127 ahart 1.9 elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
128 ahart 1.2 || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
129 ahart 1.10 && !$opt{"destination"} && !$opt{"print"})
130 ahart 1.1 {
131 ahart 1.2 system ("rm -f $outputDir/$file") if $opt{"force"};
132     system ("rm -i $outputDir/$file") if !$opt{"force"};
133 ahart 1.1 }
134 ahart 1.9 elsif ((($opt{"dumb"} && $filesToKeep{$jobNumber} == $submissionNumber)
135     || (!$opt{"dumb"} && $filesToKeep{$jobNumber} eq $file))
136 ahart 1.10 && $opt{"destination"} && !$opt{"print"})
137 ahart 1.9 {
138     system ("mv -f $outputDir/$file $opt{'destination'}/$file") if $opt{"force"};
139     system ("mv -i $outputDir/$file $opt{'destination'}/$file") if !$opt{"force"};
140     }
141 ahart 1.1 }
142 ahart 1.10 if (!$opt{"print"})
143     {
144     opendir (CRAB_OUTPUT, "$outputDir");
145     my @crabOutput = readdir (CRAB_OUTPUT);
146     closedir (CRAB_OUTPUT);
147     opendir (CRAB_OUTPUT, "$opt{'destination'}");
148     my @crabOutputInDestination = readdir (CRAB_OUTPUT);
149     closedir (CRAB_OUTPUT);
150     my $nFiles = @crabOutput - 2;
151     $nFiles = @crabOutputInDestination - 2 if $opt{"destination"};
152     print "After cleaning: $nFiles files\n";
153     }
154     else
155     {
156     foreach my $file (keys %filesToKeep)
157     {
158     print $filesToKeep{$file} . "\n";
159     }
160     }
161 ahart 1.1
162     sub
163     printHelp
164     {
165     my $exeName = $0;
166     $exeName =~ s/^.*\/([^\/]*)$/$1/;
167    
168 ahart 1.2 print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
169 ahart 1.3 print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
170 ahart 1.10 print " or: $exeName -p [OPTION]... CRAB_DIR\n";
171 ahart 1.4 print " or: $exeName -s [OPTION]... DATASET\n";
172 ahart 1.2 print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
173     print "contained in the CRAB working directory provided.\n";
174 ahart 1.1 print "\n";
175     print "Mandatory arguments to long options are mandatory for short options too.\n";
176 ahart 1.2 printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
177     printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
178     printf "%-29s%s\n", " ", "if the working directory has been lost";
179 ahart 1.1 printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
180     printf "%-29s%s\n", " ", "removal)";
181     printf "%-29s%s\n", " -h, --help", "print this help message";
182 ahart 1.9 printf "%-29s%s\n", " -o, --destination DIR", "do not remove excess output and move the good files";
183     printf "%-29s%s\n", " ", "to DIR";
184 ahart 1.10 printf "%-29s%s\n", " -p, --print", "do not remove any files, just print the names of";
185     printf "%-29s%s\n", " ", "the good files to the screen";
186 ahart 1.4 printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
187     printf "%-29s%s\n", " ", "of a directory";
188 ahart 1.1
189     exit;
190     }
191 ahart 1.2
192     sub
193     parseCrabDir
194     {
195     my $crabDir = shift;
196     my $filesToKeep = shift;
197    
198     opendir (CRAB_RESULTS, "$crabDir/res");
199     my @crabResults = readdir (CRAB_RESULTS);
200     closedir (CRAB_RESULTS);
201     for my $file (@crabResults)
202     {
203     next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
204     my $jobNumber = $file;
205     $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
206     my @status = `grep 'ExitStatus' $crabDir/res/$file`;
207     my $wrapperStatus = $status[0];
208     $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
209 ahart 1.3 $wrapperStatus =~ s/\n//g;
210 ahart 1.2 if ($wrapperStatus != 0)
211     {
212     print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
213     next;
214     }
215 ahart 1.8 my @surlForGrid = `grep -A 1 '<SurlForGrid' $crabDir/res/$file`;
216 ahart 1.2 my $fileName = $surlForGrid[1];
217 ahart 1.9 $fileName = $surlForGrid[0] if $surlForGrid[0] =~ m/Value=/;
218 ahart 1.8 $fileName =~ s/^.*\/([^\/]*)\n/$1/ if !($fileName =~ m/Value=/);
219 ahart 1.9 $fileName =~ s/^.*\/([^\/]*)"\/>\n/$1/ if $fileName =~ m/Value=/;
220 ahart 1.2 $filesToKeep->{$jobNumber} = $fileName;
221     }
222     }