ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/OSUT3Analysis/DBTools/scripts/crabclean
Revision: 1.6
Committed: Fri May 10 00:24:02 2013 UTC (11 years, 11 months ago) by ahart
Branch: MAIN
CVS Tags: V02-01-00
Changes since 1.5: +4 -1 lines
Log Message:
Loosened the criteria for matching the given dataset name to a database entry to that it behaves like osudb or osusub.

File Contents

# Content
1 #!/usr/bin/env perl
2
3 use strict;
4 use Getopt::Long;
5 use Mysql;
6
7 sub printHelp;
8 sub parseCrabDir;
9
10 our $db = Mysql->connect ("cmshead.mps.ohio-state.edu", "ntuple", "osuT3User") or die "Failed to connect to Tier 3, stopped";
11
12 my %opt;
13 Getopt::Long::Configure ("bundling");
14 GetOptions (\%opt, "dumb|d", "force|f", "dataset|s", "help|h");
15
16 printHelp () if $opt{"help"} || !$ARGV[0] || (!$opt{"dumb"} && !$opt{"dataset"} && !$ARGV[1]);
17 my $crabDir = $ARGV[0];
18 my $outputDir = $ARGV[0];
19 my $dataset = $ARGV[0];
20 $outputDir = $ARGV[1] if !$opt{"dumb"} && !$opt{"dataset"};
21
22 if ($opt{"dataset"})
23 {
24 my $queryDataset = $dataset;
25 $queryDataset =~ s/\*/%/g;
26 $queryDataset =~ s/(.*)/%$1%/g;
27 my $query = "select workingDirectory,location,dataset,user,creationTime from ntuple where dataset like '$queryDataset' order by creationTime";
28 $db->selectdb ("ntuple");
29 my $results = $db->query ($query);
30 if ($results->numrows () == 1)
31 {
32 my @row = $results->fetchrow ();
33 $crabDir = "$row[0]/ntuple";
34 $outputDir = $row[1];
35 }
36 if ($results->numrows () == 0)
37 {
38 print "Database entry does not exist.\n";
39 exit;
40 }
41 if ($results->numrows () > 1)
42 {
43 my %workingDir;
44 my %location;
45 print "Found multiple database entries matching\n";
46 print "\"$dataset\":\n";
47 for (my $i = 1; $i <= $results->numrows (); $i++)
48 {
49 my @row = $results->fetchrow ();
50 $workingDir{"$i"} = $row[0];
51 $location{"$i"} = $row[1];
52 printf "(%2d) $row[2]\n", $i;
53 print " created by $row[3] on $row[4]\n";
54 }
55 print "\nWhich entry would you like to use?: ";
56 my $response = <STDIN>;
57 $response =~ s/[ \t\n]//g;
58 if (!(exists $workingDir{$response}))
59 {
60 print "Your selection \"$response\" was not a valid option! Quitting.\n";
61 exit;
62 }
63 $crabDir = "$workingDir{$response}/ntuple";
64 $outputDir = $location{$response};
65 }
66 }
67
68 if (!(-e $outputDir) || !(-d $outputDir))
69 {
70 print "Output directory $outputDir does not exist!\n";
71 exit;
72 }
73 opendir (CRAB_OUTPUT, "$outputDir");
74 my @crabOutput = readdir (CRAB_OUTPUT);
75 closedir (CRAB_OUTPUT);
76 my %filesToKeep;
77 if (!$opt{"dumb"})
78 {
79 if (!(-e $crabDir) || !(-d $crabDir))
80 {
81 print "CRAB directory $crabDir does not exist!\n";
82 exit;
83 }
84 parseCrabDir ($crabDir, \%filesToKeep);
85 }
86 else
87 {
88 foreach my $file (@crabOutput)
89 {
90 next if ($file eq "." || $file eq "..");
91 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
92 my $jobNumber = $file;
93 my $submissionNumber = $file;
94 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
95 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
96 if (!(defined $filesToKeep{$jobNumber})
97 || $filesToKeep{$jobNumber} < $submissionNumber)
98 {
99 $filesToKeep{$jobNumber} = $submissionNumber;
100 }
101 }
102 }
103 foreach my $file (@crabOutput)
104 {
105 next if ($file eq "." || $file eq "..");
106 next if !($file =~ m/^.*_[^_]*_[^_]*_[^_]*\.root$/);
107 my $jobNumber = $file;
108 my $submissionNumber = $file;
109 $jobNumber =~ s/^.*_([^_]*)_[^_]*_[^_]*\.root$/$1/;
110 $submissionNumber =~ s/^.*_[^_]*_([^_]*)_[^_]*\.root$/$1/;
111 if (!(defined $filesToKeep{$jobNumber}))
112 {
113 print "No information found for job $jobNumber!\n";
114 }
115 elsif (($opt{"dumb"} && $filesToKeep{$jobNumber} != $submissionNumber)
116 || (!$opt{"dumb"} && $filesToKeep{$jobNumber} ne $file))
117 {
118 system ("rm -f $outputDir/$file") if $opt{"force"};
119 system ("rm -i $outputDir/$file") if !$opt{"force"};
120 }
121 }
122
123 sub
124 printHelp
125 {
126 my $exeName = $0;
127 $exeName =~ s/^.*\/([^\/]*)$/$1/;
128
129 print "Usage: $exeName [OPTION]... CRAB_DIR OUTPUT_DIR\n";
130 print " or: $exeName -d [OPTION]... OUTPUT_DIR\n";
131 print " or: $exeName -s [OPTION]... DATASET\n";
132 print "Deletes excess EDM output caused by multiple CRAB submissions using the results\n";
133 print "contained in the CRAB working directory provided.\n";
134 print "\n";
135 print "Mandatory arguments to long options are mandatory for short options too.\n";
136 printf "%-29s%s\n", " -d, --dumb", "tries to use the filenames of the output instead of";
137 printf "%-29s%s\n", " ", "the results from the CRAB working directory; useful";
138 printf "%-29s%s\n", " ", "if the working directory has been lost";
139 printf "%-29s%s\n", " -f, --force", "never prompt (default is to prompt before any";
140 printf "%-29s%s\n", " ", "removal)";
141 printf "%-29s%s\n", " -h, --help", "print this help message";
142 printf "%-29s%s\n", " -s, --dataset", "tells the script to expect a dataset name instead";
143 printf "%-29s%s\n", " ", "of a directory";
144
145 exit;
146 }
147
148 sub
149 parseCrabDir
150 {
151 my $crabDir = shift;
152 my $filesToKeep = shift;
153
154 opendir (CRAB_RESULTS, "$crabDir/res");
155 my @crabResults = readdir (CRAB_RESULTS);
156 closedir (CRAB_RESULTS);
157 for my $file (@crabResults)
158 {
159 next if !($file =~ m/^crab_fjr_[^\.]*\.xml$/);
160 my $jobNumber = $file;
161 $jobNumber =~ s/^crab_fjr_([^\.]*)\.xml$/$1/;
162 my @status = `grep 'ExitStatus' $crabDir/res/$file`;
163 my $wrapperStatus = $status[0];
164 $wrapperStatus =~ s/^.*ExitStatus="([^"]*)".*$/$1/;
165 $wrapperStatus =~ s/\n//g;
166 if ($wrapperStatus != 0)
167 {
168 print "Job $jobNumber failed (return value $wrapperStatus). Skipping.\n";
169 next;
170 }
171 my @surlForGrid = `grep -A 1 '<SurlForGrid>' $crabDir/res/$file`;
172 my $fileName = $surlForGrid[1];
173 $fileName =~ s/^.*\/([^\/]*)\n/$1/;
174 $filesToKeep->{$jobNumber} = $fileName;
175 }
176 }