ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/JOBROBOT/TaskProfile
Revision: 1.2
Committed: Wed Oct 12 16:55:56 2005 UTC (19 years, 6 months ago) by lat
Branch: MAIN
CVS Tags: PHEDEX_V2_2_20051025
Changes since 1.1: +12 -5 lines
Log Message:
Report understandable job status name, and include more detail for running jobs.

File Contents

# User Rev Content
1 lat 1.1 #!/usr/bin/env perl
2    
3     ##H This drop box agent reports status of submitted jobs by maintaining
4     ##H a simple CSV file. It maintains the counts by site, job type, the
5     ##H owner/dataset, job submission status, and job exit code.
6     ##H
7     ##H Usage:
8     ##H TaskProfile
9     ##H -state DIRECTORY [-next NEXT] [-wait SECS]
10     ##H -taskrepo DIRECTORY
11     ##H
12     ##H -state agent state directory, including inbox
13     ##H -next next agent to pass the drops to; can be given several times
14     ##H -wait time to wait in seconds between work scans
15     ##H -taskrepo directory with all the tasks in it
16    
17     BEGIN {
18     use strict; use warnings; $^W=1;
19     our $me = $0; $me =~ s|.*/||;
20     our $home = $0; $home =~ s|/[^/]+$||; $home ||= "."; $home .= "/../../Toolkit/Common";
21     unshift(@INC, $home);
22     }
23    
24     ######################################################################
25     use UtilsHelp;
26     while (scalar @ARGV)
27     {
28     if ($ARGV[0] eq '-state' && scalar @ARGV > 1)
29     { shift (@ARGV); $args{DROPDIR}= shift(@ARGV); }
30     elsif ($ARGV[0] eq '-next' && scalar @ARGV > 1)
31     { shift (@ARGV); push (@{$args{NEXTDIR}}, shift(@ARGV)); }
32     elsif ($ARGV[0] eq '-wait' && scalar @ARGV > 1)
33     { shift (@ARGV); $args{WAITTIME} = shift(@ARGV); }
34     elsif ($ARGV[0] eq '-taskrepo' && scalar @ARGV > 1)
35     { shift (@ARGV); $args{TASKREPO} = shift(@ARGV); }
36     elsif ($ARGV[0] eq '-h')
37     { &usage(); }
38     else
39     { last; }
40     }
41    
42     if (@ARGV || !$args{DROPDIR} || !$args{TASKREPO})
43     {
44     die "Insufficient parameters, use -h for help.\n";
45     }
46    
47     (new TaskProfile (%args))->process();
48    
49     ######################################################################
50     # Routines specific to this agent.
51     package TaskProfile; use strict; use warnings; use base 'UtilsAgent';
52     use UtilsCommand;
53     use UtilsLogging;
54     use UtilsTiming;
55    
56     sub new
57     {
58     my $proto = shift;
59     my $class = ref($proto) || $proto;
60     my $self = $class->SUPER::new(@_);
61     my %params = (TASKREPO => undef); # task base directory
62     my %args = (@_);
63     map { $self->{$_} = $args{$_} || $params{$_} } keys %params;
64     bless $self, $class;
65     return $self;
66     }
67    
68     # Find out how many jobs are pending for each site. This is
69     # insensitive to the job type, and we only check once in the
70     # beginning to avoid favouring one dataset over another --
71     # once we decide to proceed for a site, we submit jobs for
72     # all datasets.
73     sub getSiteStatus
74     {
75     my ($self) = @_;
76     my $result = {};
77     my $taskrepo = $self->{TASKREPO};
78 lat 1.2 my %statusname = ('X' => 'Initial', 'C' => 'Created', 'N' => 'No Input',
79     'S' => 'Submitted', 'P' => 'Pre-retrieve', 'R' => 'Retrieved',
80     'A' => 'Aborted', 'K' => 'Cancelled');
81    
82 lat 1.1 foreach my $site (<$taskrepo/*>)
83     {
84     my ($sitename) = ($site =~ m|.*/(.*)|);
85     foreach my $taskdir (<$site/*/*>)
86     {
87     # Match components from name SC3.FNAL.ExSimHitStatistics.jm03b_qcd_20_30.jm_Hit245_2_g133.1
88     my ($apptype, $owner, $dataset) = ($taskdir =~ m!.*/SC3\.[^.]+\.([^.]+)\.(\S+)\.([^.]+)\.\d+!);
89     my $crabdir = (<$taskdir/crab_*>)[0];
90     my (@jobids, @jobstat);
91     @jobids = split(/\n/, &input("$crabdir/log/scheduler_id.log") || '') if $crabdir;
92     @jobstat = split(/\n/, &input("$crabdir/share/scripts.list") || '') if $crabdir;
93     my $crabinfo = &input("$taskdir/JOB_STATUS_LOG.txt") || '';
94     $crabinfo =~ s/.*\nSTART \d//s;
95     my %crabstat = map { /^ JOB\s+\d+:\s+(\S+)\s+STATUS:\s+(.*)/ ? ($1 => $2) : () }
96     grep (/^ JOB/, split(/\n/, $crabinfo));
97    
98     for (my $i = 0; $i <= $#jobstat; ++$i)
99     {
100     # Determine job status according to CRAB and from log.
101     my $status = (split(/ /, $jobstat[$i]))[1];
102 lat 1.2 my $statusname = $statusname{$status};
103 lat 1.1 if ($status =~ /^[XCN]$/)
104     {
105 lat 1.2 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{""} ||= 0;
106     $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{""}++;
107 lat 1.1 }
108     elsif ($status =~ /^[RPAKS]$/)
109     {
110     # Figure out what last job status said
111     my $jobstat = $crabstat{$jobids[$i]} || '';
112     my ($stat, $exit) = ($jobstat =~ /(\S+)(?:\s+EXIT_CODE:\s+(\d+))?/);
113     $exit = '' if ! defined $exit;
114    
115     # If CRAB says it was aborted, trust it
116     $status = 'A' if (defined $stat && $stat eq 'Aborted');
117 lat 1.2 $statusname = $statusname{$status};
118     $statusname = $stat if (defined $stat && $status eq 'S');
119 lat 1.1
120     # Now record.
121 lat 1.2 $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{$exit} ||= 0;
122     $result->{$sitename}{$apptype}{$owner}{$dataset}{$statusname}{$exit}++;
123 lat 1.1 }
124     }
125     }
126     }
127    
128     return $result;
129     }
130    
131     sub idle
132     {
133     my ($self, @pending) = @_;
134    
135     # Get status of how busy the sites are. We obtain this only once
136     # in order to not favour datasets "early on" in the list.
137     my $stats = $self->getSiteStatus ();
138     my $now = time();
139    
140 lat 1.2 my $output = "Time,Site,App,Owner,Dataset,Status,Exitcode,Count\n";
141 lat 1.1 foreach my $site (keys %$stats) {
142     foreach my $app (keys %{$stats->{$site}}) {
143     foreach my $owner (keys %{$stats->{$site}{$app}}) {
144     foreach my $ds (keys %{$stats->{$site}{$app}{$owner}}) {
145     foreach my $status (keys %{$stats->{$site}{$app}{$owner}{$ds}}) {
146     foreach my $exit (keys %{$stats->{$site}{$app}{$owner}{$ds}{$status}}) {
147     my $val = $stats->{$site}{$app}{$owner}{$ds}{$status}{$exit};
148     $output .= "$now,$site,$app,$owner,$ds,$status,$exit,$val\n";
149     }
150     }
151     }
152     }
153     }
154     }
155    
156     &output ("$self->{DROPDIR}/jobstatus.csv", $output);
157    
158     $self->nap ($self->{WAITTIME});
159     }