ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/JOBROBOT/TaskProfile
Revision: 1.5
Committed: Mon Apr 10 16:38:01 2006 UTC (19 years ago) by gutsche
Branch: MAIN
CVS Tags: JOBROBOT_1_0006, JOBROBOT_1_0005, JOBROBOT_1_0004_for_CRAB_1_2_0_cmssw_pre6, JOBROBOT_1_0003_for_CRAB_1_2_0_cmssw_pre6, JOBROBOT_1_0002_for_CRAB_1_1_0, JOBROBOT_1_0001, HEAD
Changes since 1.4: +2 -2 lines
Error occurred while calculating annotation data.
Log Message:
inital commit of CRAB Version 1 JobRobot

File Contents

# Content
1 #!/usr/bin/env perl
2
3 ##H This drop box agent reports status of submitted jobs by maintaining
4 ##H a simple CSV file. It maintains the counts by site, job type, the
5 ##H owner/dataset, job submission status, and job exit code.
6 ##H
7 ##H Usage:
8 ##H TaskProfile
9 ##H -state DIRECTORY [-next NEXT] [-wait SECS]
10 ##H -taskrepo DIRECTORY
11 ##H
12 ##H -state agent state directory, including inbox
13 ##H -next next agent to pass the drops to; can be given several times
14 ##H -wait time to wait in seconds between work scans
15 ##H -taskrepo directory with all the tasks in it
16
17 BEGIN {
18 use strict; use warnings; $^W=1;
19 our $me = $0; $me =~ s|.*/||;
20 our $home = $0; $home =~ s|/[^/]+$||; $home ||= "."; $home .= "/../PHEDEX/Toolkit/Common";
21 unshift(@INC, $home);
22 }
23
24 ######################################################################
25 use UtilsHelp;
26 while (scalar @ARGV)
27 {
28 if ($ARGV[0] eq '-state' && scalar @ARGV > 1)
29 { shift (@ARGV); $args{DROPDIR}= shift(@ARGV); }
30 elsif ($ARGV[0] eq '-next' && scalar @ARGV > 1)
31 { shift (@ARGV); push (@{$args{NEXTDIR}}, shift(@ARGV)); }
32 elsif ($ARGV[0] eq '-wait' && scalar @ARGV > 1)
33 { shift (@ARGV); $args{WAITTIME} = shift(@ARGV); }
34 elsif ($ARGV[0] eq '-taskrepo' && scalar @ARGV > 1)
35 { shift (@ARGV); push(@{$args{TASKREPO}}, shift(@ARGV)); }
36 elsif ($ARGV[0] eq '-h')
37 { &usage(); }
38 else
39 { last; }
40 }
41
42 if (@ARGV || !$args{DROPDIR} || !$args{TASKREPO})
43 {
44 die "Insufficient parameters, use -h for help.\n";
45 }
46
47 (new TaskProfile (%args))->process();
48
49 ######################################################################
50 # Routines specific to this agent.
51 package TaskProfile; use strict; use warnings; use base 'UtilsAgent';
52 use UtilsCommand;
53 use UtilsLogging;
54 use UtilsTiming;
55
56 sub new
57 {
58 my $proto = shift;
59 my $class = ref($proto) || $proto;
60 my $self = $class->SUPER::new(@_);
61 my %params = (TASKREPO => undef); # task base directory
62 my %args = (@_);
63 map { $self->{$_} = $args{$_} || $params{$_} } keys %params;
64 bless $self, $class;
65 return $self;
66 }
67
68 # Find out how many jobs are pending for each site. This is
69 # insensitive to the job type, and we only check once in the
70 # beginning to avoid favouring one dataset over another --
71 # once we decide to proceed for a site, we submit jobs for
72 # all datasets.
73 sub getSiteStatus
74 {
75 my ($self) = @_;
76 my $result = {};
77 my %statusname = ('X' => 'Initial', 'C' => 'Created', 'N' => 'No Input',
78 'S' => 'Submitted', 'P' => 'Pre-retrieve', 'R' => 'Retrieved',
79 'A' => 'Aborted', 'K' => 'Cancelled');
80
81 foreach my $site (map { (<$_/*/*>) } @{$self->{TASKREPO}})
82 {
83 my ($sitename) = ($site =~ m|.*/(.*)|);
84 foreach my $taskdir (<$site/*/*>)
85 {
86 # Match components from name FNAL.ExSimHitStatistics.jm03b_qcd_20_30.jm_Hit245_2_g133.1
87 my ($apptype, $owner, $dataset) = ($taskdir =~ m!.*/SC3\.[^.]+\.([^.]+)\.(\S+)\.([^.]+)\.\d+!);
88 my $crabdir = (<$taskdir/crab_*>)[0];
89 my (@jobids, @jobstat);
90 @jobids = split(/\n/, &input("$crabdir/log/scheduler_id.log") || '') if $crabdir;
91 @jobstat = split(/\n/, &input("$crabdir/share/scripts.list") || '') if $crabdir;
92 my $crabinfo = &input("$taskdir/JOB_STATUS_LOG.txt") || '';
93 $crabinfo =~ s/.*\nSTART \d//s;
94 my %crabstat = map { /^ JOB\s+\d+:\s+(\S+)\s+STATUS:\s+(.*)/ ? ($1 => $2) : () }
95 grep (/^ JOB/, split(/\n/, $crabinfo));
96
97 for (my $i = 0; $i <= $#jobstat; ++$i)
98 {
99 # Determine job status according to CRAB and from log.
100 my $status = (split(/ /, $jobstat[$i]))[1];
101 my $statusname = $statusname{$status};
102 if ($status =~ /^[XCN]$/)
103 {
104 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{""} ||= 0;
105 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{""}++;
106 }
107 elsif ($status =~ /^[RPAKS]$/)
108 {
109 # Figure out what last job status said
110 my $jobstat = $crabstat{$jobids[$i] || ''} || '';
111 my ($stat, $exit) = ($jobstat =~ /(\S+)(?:\s+EXIT_CODE:\s+(\d+))?/);
112 $exit = '' if ! defined $exit;
113
114 # If CRAB says it was aborted, trust it
115 $status = 'A' if (defined $stat && $stat eq 'Aborted');
116 $statusname = $statusname{$status};
117 $statusname = $stat if (defined $stat && $status eq 'S');
118
119 # Now record.
120 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{$exit} ||= 0;
121 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{$exit}++;
122 }
123 }
124 }
125 }
126
127 return $result;
128 }
129
130 sub idle
131 {
132 my ($self, @pending) = @_;
133
134 # Get status of how busy the sites are. We obtain this only once
135 # in order to not favour datasets "early on" in the list.
136 my $stats = $self->getSiteStatus ();
137 my $now = time();
138
139 my $output = "Time,Site,App,Owner,Dataset,Status,Exitcode,Count\n";
140 foreach my $site (keys %$stats) {
141 foreach my $app (keys %{$stats->{$site}}) {
142 foreach my $owner (keys %{$stats->{$site}{$app}}) {
143 foreach my $ds (keys %{$stats->{$site}{$app}{$owner}}) {
144 foreach my $status (keys %{$stats->{$site}{$app}{$owner}{$ds}}) {
145 foreach my $exit (keys %{$stats->{$site}{$app}{$owner}{$ds}{$status}}) {
146 my $val = $stats->{$site}{$app}{$owner}{$ds}{$status}{$exit};
147 $output .= "$now,$site,$app,$owner,$ds,$status,$exit,$val\n";
148 }
149 }
150 }
151 }
152 }
153 }
154
155 &output ("$self->{DROPDIR}/jobstatus.csv", $output);
156
157 $self->nap ($self->{WAITTIME});
158 }