ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
Revision: 1.7
Committed: Thu May 23 20:38:44 2013 UTC (11 years, 11 months ago) by ahart
Branch: MAIN
CVS Tags: V02-02-00, V02-01-01
Changes since 1.6: +20 -2 lines
Log Message:
When choosing among multiple datasets, the status of each dataset is now displayed in living color so hopefully it's easier to find the one you want. Also prints out the number of files before and after cleaning.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Getopt::Long;
5 ahart 1.4 use Mysql;
6 ahart 1.7 use Term::ANSIColor;
7 ahart 1.1
8 ahart 1.2 sub printHelp;
9     sub parseCrabDir;
10    
11 ahart 1.4 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
12    
13 ahart 1.1 my %opt;
14     Getopt::Long::Configure ("bundling");
15 ahart 1.4 GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h");
16 ahart 1.1
17 ahart 1.4 printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
18 ahart 1.2 my $crabDir = $ARGV[0];
19     my $outputDir = $ARGV[0];
20 ahart 1.4 my $dataset = $ARGV[0];
21     $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
22    
23     if ($opt{"dataset"})
24     {
25 ahart 1.6 my $queryDataset = $dataset;
26     $queryDataset =~ s/\*/%/g;
27     $queryDataset =~ s/(.*)/%$1%/g;
28 ahart 1.7 my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime";
29 ahart 1.4 $db->selectdb ("ntuple");
30     my $results = $db->query ($query);
31     if ($results->numrows () == 1)
32     {
33     my @row = $results->fetchrow ();
34     $crabDir = "$row[0]/ntuple";
35     $outputDir = $row[1];
36     }
37     if ($results->numrows () == 0)
38     {
39     print "Database entry does not exist.\n";
40     exit;
41     }
42 ahart 1.5 if ($results->numrows () > 1)
43 ahart 1.4 {
44 ahart 1.5 my %workingDir;
45     my %location;
46     print "Found multiple database entries matching\n";
47     print "\"$dataset\":\n";
48     for (my $i = 1; $i <= $results->numrows (); $i++)
49     {
50     my @row = $results->fetchrow ();
51 ahart 1.7 $row[4] =~ s/([^ ]*) [^ ]*/$1/g;
52 ahart 1.5 $workingDir{"$i"} = $row[0];
53     $location{"$i"} = $row[1];
54     printf "(%2d) $row[2]\n", $i;
55 ahart 1.7 print " (";
56     print color "green" if $row[5] eq "present";
57     print color "bold yellow" if $row[5] eq "submitted";
58     print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
59     print $row[5];
60     print color "reset";
61     print ") created by $row[3] on $row[4]\n";
62 ahart 1.5 }
63     print "\nWhich entry would you like to use?: ";
64     my $response = <STDIN>;
65     $response =~ s/[ \t\n]//g;
66     if (!(exists $workingDir{$response}))
67     {
68     print "Your selection \"$response\" was not a valid option! Quitting.\n";
69     exit;
70     }
71     $crabDir = "$workingDir{$response}/ntuple";
72     $outputDir = $location{$response};
73 ahart 1.4 }
74     }
75    
76 ahart 1.2 if (!(-e $outputDir) || !(-d $outputDir))
77     {
78     print "Output directory $outputDir does not exist!\n";
79     exit;
80     }
81     opendir (CRAB_OUTPUT, "$outputDir");
82 ahart 1.1 my @crabOutput = readdir (CRAB_OUTPUT);
83     closedir (CRAB_OUTPUT);
84     my %filesToKeep;
85 ahart 1.2 if (!$opt{"dumb"})
86     {
87     if (!(-e $crabDir) || !(-d $crabDir))
88     {
89     print "CRAB directory $crabDir does not exist!\n";
90     exit;
91     }
92     parseCrabDir ($crabDir, \%filesToKeep);
93     }
94     else
95 ahart 1.1 {
96 ahart 1.2 foreach my $file (@crabOutput)
97 ahart 1.1 {
98 ahart 1.2 next if ($file eq "." || $file eq "..");
99     next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
100     my $jobNumber = $file;
101     my $submissionNumber = $file;
102     $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
103     $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
104     if (!(defined $filesToKeep{$jobNumber})
105     || $filesToKeep{$jobNumber} < $submissionNumber)
106     {
107     $filesToKeep{$jobNumber} = $submissionNumber;
108     }
109 ahart 1.1 }
110     }
111 ahart 1.7 opendir (CRAB_OUTPUT, "$outputDir");
112     my @crabOutput = readdir (CRAB_OUTPUT);
113     closedir (CRAB_OUTPUT);
114     my $nFiles = @crabOutput - 2;
115     print "Before cleaning: $nFiles files\n";
116 ahart 1.1 foreach my $file (@crabOutput)
117     {
118     next if ($file eq "." || $file eq "..");
119     next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
120     my $jobNumber = $file;
121     my $submissionNumber = $file;
122     $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
123     $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
124 ahart 1.2 if (!(defined $filesToKeep{$jobNumber}))
125     {
126     print "No information found for job $jobNumber!\n";
127     }
128     elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
129     || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
130 ahart 1.1 {
131 ahart 1.2 system ("rm -f $outputDir/$file") if $opt{"force"};
132     system ("rm -i $outputDir/$file") if !$opt{"force"};
133 ahart 1.1 }
134     }
135 ahart 1.7 opendir (CRAB_OUTPUT, "$outputDir");
136     my @crabOutput = readdir (CRAB_OUTPUT);
137     closedir (CRAB_OUTPUT);
138     my $nFiles = @crabOutput - 2;
139     print "After cleaning: $nFiles files\n";
140 ahart 1.1
141     sub
142     printHelp
143     {
144     my $exeName = $0;
145     $exeName =~ s/^.*\/([^\/]*)$/$1/;
146    
147 ahart 1.2 print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
148 ahart 1.3 print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
149 ahart 1.4 print " or: $exeName -s [OPTION]... DATASET\n";
150 ahart 1.2 print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
151     print "contained in the CRAB working directory provided.\n";
152 ahart 1.1 print "\n";
153     print "Mandatory arguments to long options are mandatory for short options too.\n";
154 ahart 1.2 printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
155     printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
156     printf "%-29s%s\n", " ", "if the working directory has been lost";
157 ahart 1.1 printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
158     printf "%-29s%s\n", " ", "removal)";
159     printf "%-29s%s\n", " -h, --help", "print this help message";
160 ahart 1.4 printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
161     printf "%-29s%s\n", " ", "of a directory";
162 ahart 1.1
163     exit;
164     }
165 ahart 1.2
166     sub
167     parseCrabDir
168     {
169     my $crabDir = shift;
170     my $filesToKeep = shift;
171    
172     opendir (CRAB_RESULTS, "$crabDir/res");
173     my @crabResults = readdir (CRAB_RESULTS);
174     closedir (CRAB_RESULTS);
175     for my $file (@crabResults)
176     {
177     next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
178     my $jobNumber = $file;
179     $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
180     my @status = `grep 'ExitStatus' $crabDir/res/$file`;
181     my $wrapperStatus = $status[0];
182     $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
183 ahart 1.3 $wrapperStatus =~ s/\n//g;
184 ahart 1.2 if ($wrapperStatus != 0)
185     {
186     print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
187     next;
188     }
189     my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`;
190     my $fileName = $surlForGrid[1];
191     $fileName =~ s/^.*\/([^\/]*)\n/$1/;
192     $filesToKeep->{$jobNumber} = $fileName;
193     }
194     }