ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
Revision: 1.7
Committed: Thu May 23 20:38:44 2013 UTC (11 years, 11 months ago) by ahart
Branch: MAIN
CVS Tags: V02-02-00, V02-01-01
Changes since 1.6: +20 -2 lines
Log Message:
When choosing among multiple datasets, the status of each dataset is now displayed in living color so hopefully it's easier to find the one you want. Also prints out the number of files before and after cleaning.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Getopt::Long;
5 use Mysql;
6 use Term::ANSIColor;
7
8 sub printHelp;
9 sub parseCrabDir;
10
11 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
12
13 my %opt;
14 Getopt::Long::Configure ("bundling");
15 GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h");
16
17 printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
18 my $crabDir = $ARGV[0];
19 my $outputDir = $ARGV[0];
20 my $dataset = $ARGV[0];
21 $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
22
23 if ($opt{"dataset"})
24 {
25 my $queryDataset = $dataset;
26 $queryDataset =~ s/\*/%/g;
27 $queryDataset =~ s/(.*)/%$1%/g;
28 my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime";
29 $db->selectdb ("ntuple");
30 my $results = $db->query ($query);
31 if ($results->numrows () == 1)
32 {
33 my @row = $results->fetchrow ();
34 $crabDir = "$row[0]/ntuple";
35 $outputDir = $row[1];
36 }
37 if ($results->numrows () == 0)
38 {
39 print "Database entry does not exist.\n";
40 exit;
41 }
42 if ($results->numrows () > 1)
43 {
44 my %workingDir;
45 my %location;
46 print "Found multiple database entries matching\n";
47 print "\"$dataset\":\n";
48 for (my $i = 1; $i <= $results->numrows (); $i++)
49 {
50 my @row = $results->fetchrow ();
51 $row[4] =~ s/([^ ]*) [^ ]*/$1/g;
52 $workingDir{"$i"} = $row[0];
53 $location{"$i"} = $row[1];
54 printf "(%2d) $row[2]\n", $i;
55 print " (";
56 print color "green" if $row[5] eq "present";
57 print color "bold yellow" if $row[5] eq "submitted";
58 print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
59 print $row[5];
60 print color "reset";
61 print ") created by $row[3] on $row[4]\n";
62 }
63 print "\nWhich entry would you like to use?: ";
64 my $response = <STDIN>;
65 $response =~ s/[ \t\n]//g;
66 if (!(exists $workingDir{$response}))
67 {
68 print "Your selection \"$response\" was not a valid option! Quitting.\n";
69 exit;
70 }
71 $crabDir = "$workingDir{$response}/ntuple";
72 $outputDir = $location{$response};
73 }
74 }
75
76 if (!(-e $outputDir) || !(-d $outputDir))
77 {
78 print "Output directory $outputDir does not exist!\n";
79 exit;
80 }
81 opendir (CRAB_OUTPUT, "$outputDir");
82 my @crabOutput = readdir (CRAB_OUTPUT);
83 closedir (CRAB_OUTPUT);
84 my %filesToKeep;
85 if (!$opt{"dumb"})
86 {
87 if (!(-e $crabDir) || !(-d $crabDir))
88 {
89 print "CRAB directory $crabDir does not exist!\n";
90 exit;
91 }
92 parseCrabDir ($crabDir, \%filesToKeep);
93 }
94 else
95 {
96 foreach my $file (@crabOutput)
97 {
98 next if ($file eq "." || $file eq "..");
99 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
100 my $jobNumber = $file;
101 my $submissionNumber = $file;
102 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
103 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
104 if (!(defined $filesToKeep{$jobNumber})
105 || $filesToKeep{$jobNumber} < $submissionNumber)
106 {
107 $filesToKeep{$jobNumber} = $submissionNumber;
108 }
109 }
110 }
111 opendir (CRAB_OUTPUT, "$outputDir");
112 my @crabOutput = readdir (CRAB_OUTPUT);
113 closedir (CRAB_OUTPUT);
114 my $nFiles = @crabOutput - 2;
115 print "Before cleaning: $nFiles files\n";
116 foreach my $file (@crabOutput)
117 {
118 next if ($file eq "." || $file eq "..");
119 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
120 my $jobNumber = $file;
121 my $submissionNumber = $file;
122 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
123 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
124 if (!(defined $filesToKeep{$jobNumber}))
125 {
126 print "No information found for job $jobNumber!\n";
127 }
128 elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
129 || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
130 {
131 system ("rm -f $outputDir/$file") if $opt{"force"};
132 system ("rm -i $outputDir/$file") if !$opt{"force"};
133 }
134 }
135 opendir (CRAB_OUTPUT, "$outputDir");
136 my @crabOutput = readdir (CRAB_OUTPUT);
137 closedir (CRAB_OUTPUT);
138 my $nFiles = @crabOutput - 2;
139 print "After cleaning: $nFiles files\n";
140
141 sub
142 printHelp
143 {
144 my $exeName = $0;
145 $exeName =~ s/^.*\/([^\/]*)$/$1/;
146
147 print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
148 print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
149 print " or: $exeName -s [OPTION]... DATASET\n";
150 print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
151 print "contained in the CRAB working directory provided.\n";
152 print "\n";
153 print "Mandatory arguments to long options are mandatory for short options too.\n";
154 printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
155 printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
156 printf "%-29s%s\n", " ", "if the working directory has been lost";
157 printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
158 printf "%-29s%s\n", " ", "removal)";
159 printf "%-29s%s\n", " -h, --help", "print this help message";
160 printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
161 printf "%-29s%s\n", " ", "of a directory";
162
163 exit;
164 }
165
166 sub
167 parseCrabDir
168 {
169 my $crabDir = shift;
170 my $filesToKeep = shift;
171
172 opendir (CRAB_RESULTS, "$crabDir/res");
173 my @crabResults = readdir (CRAB_RESULTS);
174 closedir (CRAB_RESULTS);
175 for my $file (@crabResults)
176 {
177 next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
178 my $jobNumber = $file;
179 $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
180 my @status = `grep 'ExitStatus' $crabDir/res/$file`;
181 my $wrapperStatus = $status[0];
182 $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
183 $wrapperStatus =~ s/\n//g;
184 if ($wrapperStatus != 0)
185 {
186 print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
187 next;
188 }
189 my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`;
190 my $fileName = $surlForGrid[1];
191 $fileName =~ s/^.*\/([^\/]*)\n/$1/;
192 $filesToKeep->{$jobNumber} = $fileName;
193 }
194 }