ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
Revision: 1.8
Committed: Thu Jun 20 00:43:22 2013 UTC (11 years, 10 months ago) by ahart
Branch: MAIN
CVS Tags: V02-03-02, V02-03-01, V02-03-00
Changes since 1.7: +4 -2 lines
Log Message:
Now correctly handles FJRs from jobs with non-EDM output.

File Contents

# User Rev Content
1 ahart 1.1 #!/usr/bin/env perl
2    
3     use strict;
4     use Getopt::Long;
5 ahart 1.4 use Mysql;
6 ahart 1.7 use Term::ANSIColor;
7 ahart 1.1
8 ahart 1.2 sub printHelp;
9     sub parseCrabDir;
10    
11 ahart 1.4 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
12    
13 ahart 1.1 my %opt;
14     Getopt::Long::Configure ("bundling");
15 ahart 1.4 GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h");
16 ahart 1.1
17 ahart 1.4 printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
18 ahart 1.2 my $crabDir = $ARGV[0];
19     my $outputDir = $ARGV[0];
20 ahart 1.4 my $dataset = $ARGV[0];
21     $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
22    
23     if ($opt{"dataset"})
24     {
25 ahart 1.6 my $queryDataset = $dataset;
26     $queryDataset =~ s/\*/%/g;
27     $queryDataset =~ s/(.*)/%$1%/g;
28 ahart 1.7 my $query = "select workingDirectory,location,dataset,user,creationTime,status from ntuple where dataset like '$queryDataset' order by creationTime";
29 ahart 1.4 $db->selectdb ("ntuple");
30     my $results = $db->query ($query);
31     if ($results->numrows () == 1)
32     {
33     my @row = $results->fetchrow ();
34     $crabDir = "$row[0]/ntuple";
35     $outputDir = $row[1];
36     }
37     if ($results->numrows () == 0)
38     {
39     print "Database entry does not exist.\n";
40     exit;
41     }
42 ahart 1.5 if ($results->numrows () > 1)
43 ahart 1.4 {
44 ahart 1.5 my %workingDir;
45     my %location;
46     print "Found multiple database entries matching\n";
47     print "\"$dataset\":\n";
48     for (my $i = 1; $i <= $results->numrows (); $i++)
49     {
50     my @row = $results->fetchrow ();
51 ahart 1.7 $row[4] =~ s/([^ ]*) [^ ]*/$1/g;
52 ahart 1.5 $workingDir{"$i"} = $row[0];
53     $location{"$i"} = $row[1];
54     printf "(%2d) $row[2]\n", $i;
55 ahart 1.7 print " (";
56     print color "green" if $row[5] eq "present";
57     print color "bold yellow" if $row[5] eq "submitted";
58     print color "bold red" if $row[5] eq "created" or $row[5] eq "cancelled" or $row[5] eq "deprecated";
59     print $row[5];
60     print color "reset";
61     print ") created by $row[3] on $row[4]\n";
62 ahart 1.5 }
63     print "\nWhich entry would you like to use?: ";
64     my $response = <STDIN>;
65     $response =~ s/[ \t\n]//g;
66     if (!(exists $workingDir{$response}))
67     {
68     print "Your selection \"$response\" was not a valid option! Quitting.\n";
69     exit;
70     }
71     $crabDir = "$workingDir{$response}/ntuple";
72     $outputDir = $location{$response};
73 ahart 1.4 }
74     }
75    
76 ahart 1.2 if (!(-e $outputDir) || !(-d $outputDir))
77     {
78     print "Output directory $outputDir does not exist!\n";
79     exit;
80     }
81     opendir (CRAB_OUTPUT, "$outputDir");
82 ahart 1.1 my @crabOutput = readdir (CRAB_OUTPUT);
83     closedir (CRAB_OUTPUT);
84     my %filesToKeep;
85 ahart 1.2 if (!$opt{"dumb"})
86     {
87     if (!(-e $crabDir) || !(-d $crabDir))
88     {
89     print "CRAB directory $crabDir does not exist!\n";
90     exit;
91     }
92     parseCrabDir ($crabDir, \%filesToKeep);
93     }
94     else
95 ahart 1.1 {
96 ahart 1.2 foreach my $file (@crabOutput)
97 ahart 1.1 {
98 ahart 1.2 next if ($file eq "." || $file eq "..");
99     next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
100     my $jobNumber = $file;
101     my $submissionNumber = $file;
102     $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
103     $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
104     if (!(defined $filesToKeep{$jobNumber})
105     || $filesToKeep{$jobNumber} < $submissionNumber)
106     {
107     $filesToKeep{$jobNumber} = $submissionNumber;
108     }
109 ahart 1.1 }
110     }
111 ahart 1.7 opendir (CRAB_OUTPUT, "$outputDir");
112     my @crabOutput = readdir (CRAB_OUTPUT);
113     closedir (CRAB_OUTPUT);
114     my $nFiles = @crabOutput - 2;
115     print "Before cleaning: $nFiles files\n";
116 ahart 1.1 foreach my $file (@crabOutput)
117     {
118     next if ($file eq "." || $file eq "..");
119     next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
120     my $jobNumber = $file;
121     my $submissionNumber = $file;
122     $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
123     $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
124 ahart 1.2 if (!(defined $filesToKeep{$jobNumber}))
125     {
126     print "No information found for job $jobNumber!\n";
127     }
128     elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
129     || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
130 ahart 1.1 {
131 ahart 1.2 system ("rm -f $outputDir/$file") if $opt{"force"};
132     system ("rm -i $outputDir/$file") if !$opt{"force"};
133 ahart 1.1 }
134     }
135 ahart 1.7 opendir (CRAB_OUTPUT, "$outputDir");
136     my @crabOutput = readdir (CRAB_OUTPUT);
137     closedir (CRAB_OUTPUT);
138     my $nFiles = @crabOutput - 2;
139     print "After cleaning: $nFiles files\n";
140 ahart 1.1
141     sub
142     printHelp
143     {
144     my $exeName = $0;
145     $exeName =~ s/^.*\/([^\/]*)$/$1/;
146    
147 ahart 1.2 print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
148 ahart 1.3 print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
149 ahart 1.4 print " or: $exeName -s [OPTION]... DATASET\n";
150 ahart 1.2 print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
151     print "contained in the CRAB working directory provided.\n";
152 ahart 1.1 print "\n";
153     print "Mandatory arguments to long options are mandatory for short options too.\n";
154 ahart 1.2 printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
155     printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
156     printf "%-29s%s\n", " ", "if the working directory has been lost";
157 ahart 1.1 printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
158     printf "%-29s%s\n", " ", "removal)";
159     printf "%-29s%s\n", " -h, --help", "print this help message";
160 ahart 1.4 printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
161     printf "%-29s%s\n", " ", "of a directory";
162 ahart 1.1
163     exit;
164     }
165 ahart 1.2
166     sub
167     parseCrabDir
168     {
169     my $crabDir = shift;
170     my $filesToKeep = shift;
171    
172     opendir (CRAB_RESULTS, "$crabDir/res");
173     my @crabResults = readdir (CRAB_RESULTS);
174     closedir (CRAB_RESULTS);
175     for my $file (@crabResults)
176     {
177     next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
178     my $jobNumber = $file;
179     $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
180     my @status = `grep 'ExitStatus' $crabDir/res/$file`;
181     my $wrapperStatus = $status[0];
182     $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
183 ahart 1.3 $wrapperStatus =~ s/\n//g;
184 ahart 1.2 if ($wrapperStatus != 0)
185     {
186     print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
187     next;
188     }
189 ahart 1.8 my @surlForGrid = `grep -A 1 '<SurlForGrid' $crabDir/res/$file`;
190 ahart 1.2 my $fileName = $surlForGrid[1];
191 ahart 1.8 my $fileName = $surlForGrid[0] if $surlForGrid[0] =~ m/Value=/;
192     $fileName =~ s/^.*\/([^\/]*)\n/$1/ if !($fileName =~ m/Value=/);
193     $fileName =~ s/^.*\/([^\/]*)"\/>\n/$1/;
194 ahart 1.2 $filesToKeep->{$jobNumber} = $fileName;
195     }
196     }