1 |
|
2 |
###########################################################################
|
3 |
#
|
4 |
# H E L P F U N C T I O N S
|
5 |
#
|
6 |
###########################################################################
|
7 |
|
8 |
import common
|
9 |
|
10 |
import sys, os, string
|
11 |
|
12 |
import tempfile
|
13 |
|
14 |
###########################################################################
|
15 |
def usage():
|
16 |
print 'in usage()'
|
17 |
usa_string = common.prog_name + """ [options]
|
18 |
|
19 |
The most useful general options (use '-h' to get complete help):
|
20 |
|
21 |
-create -- Create all the jobs.
|
22 |
-submit n -- Submit the first n available jobs. Default is all.
|
23 |
-status [range] -- check status of all jobs.
|
24 |
-getoutput|-get [range] -- get back the output of all jobs: if range is defined, only of selected jobs
|
25 |
-publish [dbs_url] -- after the getouput, publish the data user in a local DBS instance
|
26 |
-kill [range] -- kill submitted jobs
|
27 |
-resubmit [range] -- resubmit killed/aborted/retrieved jobs
|
28 |
-clean -- gracefully cleanup the directory of a task
|
29 |
-testJdl [range] -- check if resources exist which are compatible with jdl
|
30 |
-list [range] -- show technical job details
|
31 |
-postMortem [range] -- provide a file with information useful for post-mortem analysis of the jobs
|
32 |
-printId [range] -- print the job SID or Task Unique ID while using the server
|
33 |
-printJdl [range] -- provide files with a complete Job Description (JDL)
|
34 |
-continue|-c [dir] -- Apply command to task stored in [dir].
|
35 |
-h [format] -- Detailed help. Formats: man (default), tex, html, txt
|
36 |
-cfg fname -- Configuration file name. Default is 'crab.cfg'
|
37 |
-debug N -- set the verbosity level to N
|
38 |
-v -- Print version and exit
|
39 |
|
40 |
"range" has syntax "n,m,l-p" which correspond to [n,m,l,l+1,...,p-1,p] and all possible combination
|
41 |
|
42 |
Example:
|
43 |
crab -create -submit 1
|
44 |
"""
|
45 |
print usa_string
|
46 |
sys.exit(2)
|
47 |
|
48 |
###########################################################################
|
49 |
def help(option='man'):
|
50 |
help_string = """
|
51 |
=pod
|
52 |
|
53 |
=head1 NAME
|
54 |
|
55 |
B<CRAB>: B<C>ms B<R>emote B<A>nalysis B<B>uilder
|
56 |
|
57 |
"""+common.prog_name+""" version: """+common.prog_version_str+"""
|
58 |
|
59 |
This tool B<must> be used from an User Interface and the user is supposed to
|
60 |
have a valid Grid certificate.
|
61 |
|
62 |
=head1 SYNOPSIS
|
63 |
|
64 |
B<"""+common.prog_name+"""> [I<options>] [I<command>]
|
65 |
|
66 |
=head1 DESCRIPTION
|
67 |
|
68 |
CRAB is a Python program intended to simplify the process of creation and submission of CMS analysis jobs to the Grid environment .
|
69 |
|
70 |
Parameters for CRAB usage and configuration are provided by the user changing the configuration file B<crab.cfg>.
|
71 |
|
72 |
CRAB generates scripts and additional data files for each job. The produced scripts are submitted directly to the Grid. CRAB makes use of BossLite to interface to the Grid scheduler, as well as for logging and bookkeeping.
|
73 |
|
74 |
CRAB supports any CMSSW based executable, with any modules/libraries, including user provided ones, and deals with the output produced by the executable. CRAB provides an interface to CMS data discovery services (DBS and DLS), which are completely hidden to the final user. It also splits a task (such as analyzing a whole dataset) into smaller jobs, according to user requirements.
|
75 |
|
76 |
CRAB can be used in two ways: StandAlone and with a Server.
|
77 |
The StandAlone mode is suited for small task, of the order of O(100) jobs: it submits the jobs directly to the scheduler, and these jobs are under user responsibility.
|
78 |
In the Server mode, suited for larger tasks, the jobs are prepared locally and then passed to a dedicated CRAB server, which then interacts with the scheduler on behalf of the user, including additional services, such as automatic resubmission, status caching, output retrieval, and more.
|
79 |
The CRAB commands are exactly the same in both cases.
|
80 |
|
81 |
CRAB web page is available at
|
82 |
|
83 |
I<http://cmsdoc.cern.ch/cms/ccs/wm/www/Crab/>
|
84 |
|
85 |
=head1 HOW TO RUN CRAB FOR THE IMPATIENT USER
|
86 |
|
87 |
Please, read all the way through in any case!
|
88 |
|
89 |
Source B<crab.(c)sh> from the CRAB installation area, which have been setup either by you or by someone else for you.
|
90 |
|
91 |
Modify the CRAB configuration file B<crab.cfg> according to your need: see below for a complete list. A template and commented B<crab.cfg> can be found on B<$CRABDIR/python/crab.cfg>
|
92 |
|
93 |
~>crab -create
|
94 |
create all jobs (no submission!)
|
95 |
|
96 |
~>crab -submit 2 -continue [ui_working_dir]
|
97 |
submit 2 jobs, the ones already created (-continue)
|
98 |
|
99 |
~>crab -create -submit 2
|
100 |
create _and_ submit 2 jobs
|
101 |
|
102 |
~>crab -status
|
103 |
check the status of all jobs
|
104 |
|
105 |
~>crab -getoutput
|
106 |
get back the output of all jobs
|
107 |
|
108 |
~>crab -publish
|
109 |
publish all user outputs in the DBS specified in the crab.cfg (dbs_url_for_publication) or written as argument of this option
|
110 |
|
111 |
=head1 RUNNING CMSSW WITH CRAB
|
112 |
|
113 |
=over 4
|
114 |
|
115 |
=item B<A)>
|
116 |
|
117 |
Develop your code in your CMSSW working area. Do anything which is needed to run interactively your executable, including the setup of run time environment (I<eval `scramv1 runtime -sh|csh`>), a suitable I<ParameterSet>, etc. It seems silly, but B<be extra sure that you actually did compile your code> I<scramv1 b>.
|
118 |
|
119 |
=item B<B)>
|
120 |
|
121 |
Source B<crab.(c)sh> from the CRAB installation area, which have been setup either by you or by someone else for you. Modify the CRAB configuration file B<crab.cfg> according to your need: see below for a complete list.
|
122 |
|
123 |
The most important parameters are the following (see below for complete description of each parameter):
|
124 |
|
125 |
=item B<Mandatory!>
|
126 |
|
127 |
=over 6
|
128 |
|
129 |
=item B<[CMSSW]> section: datasetpath, pset, splitting parameters, output_file
|
130 |
|
131 |
=item B<[USER]> section: output handling parameters, such as return_data, copy_data etc...
|
132 |
|
133 |
=back
|
134 |
|
135 |
=item B<Run it!>
|
136 |
|
137 |
You must have a valid voms-enabled Grid proxy. See CRAB web page for details.
|
138 |
|
139 |
=back
|
140 |
|
141 |
=head1 HOW TO RUN ON CONDOR-G
|
142 |
|
143 |
The B<Condor-G> mode for B<CRAB> is a special submission mode next to the standard Resource Broker submission. It is designed to submit jobs directly to a site and not using the Resource Broker.
|
144 |
|
145 |
Due to the nature of B<Condor-G> submission, the B<Condor-G> mode is restricted to OSG sites within the CMS Grid, currently the 7 US T2: Florida(ufl.edu), Nebraska(unl.edu), San Diego(ucsd.edu), Purdue(purdue.edu), Wisconsin(wisc.edu), Caltech(ultralight.org), MIT(mit.edu).
|
146 |
|
147 |
=head2 B<Requirements:>
|
148 |
|
149 |
=over 2
|
150 |
|
151 |
=item installed and running local Condor scheduler
|
152 |
|
153 |
(either installed by the local Sysadmin or self-installed using the VDT user interface: http://www.uscms.org/SoftwareComputing/UserComputing/Tutorials/vdt.html)
|
154 |
|
155 |
=item locally available LCG or OSG UI installation
|
156 |
|
157 |
for authentication via Grid certificate proxies ("voms-proxy-init -voms cms" should result in valid proxy)
|
158 |
|
159 |
=item set the environment variable EDG_WL_LOCATION to the edg directory of the local LCG or OSG UI installation
|
160 |
|
161 |
=back
|
162 |
|
163 |
=head2 B<What the Condor-G mode can do:>
|
164 |
|
165 |
=over 2
|
166 |
|
167 |
=item submission directly to multiple OSG sites,
|
168 |
|
169 |
the requested dataset must be published correctly by the site in the local and global services.
|
170 |
Previous restrictions on submitting only to a single site have been removed. SE and CE whitelisting
|
171 |
and blacklisting work as in the other modes.
|
172 |
|
173 |
=back
|
174 |
|
175 |
=head2 B<What the Condor-G mode cannot do:>
|
176 |
|
177 |
=over 2
|
178 |
|
179 |
=item submit jobs if no condor scheduler is running on the submission machine
|
180 |
|
181 |
=item submit jobs if the local condor installation does not provide Condor-G capabilities
|
182 |
|
183 |
=item submit jobs to an LCG site
|
184 |
|
185 |
=item support Grid certificate proxy renewal via the myproxy service
|
186 |
|
187 |
=back
|
188 |
|
189 |
=head2 B<CRAB configuration for Condor-G mode:>
|
190 |
|
191 |
The CRAB configuration for the Condor-G mode only requires one change in crab.cfg:
|
192 |
|
193 |
=over 2
|
194 |
|
195 |
=item select condor_g Scheduler:
|
196 |
|
197 |
scheduler = condor_g
|
198 |
|
199 |
=back
|
200 |
|
201 |
=head1 COMMANDS
|
202 |
|
203 |
=over 4
|
204 |
|
205 |
=item B<-create>
|
206 |
|
207 |
Create the jobs: from version 1_3_0 it is only possible to create all jobs.
|
208 |
The maximum number of jobs depends on dataset and splitting directives. This set of identical jobs accessing the same dataset are defined as a task.
|
209 |
This command create a directory with default name is I<crab_0_date_time> (can be changed via ui_working_dir parameter, see below). Inside this directory it is placed whatever is needed to submit your jobs. Also the output of your jobs (once finished) will be place there (see after). Do not cancel by hand this directory: rather use -clean (see).
|
210 |
See also I<-continue>.
|
211 |
|
212 |
=item B<-submit [range]>
|
213 |
|
214 |
Submit n jobs: 'n' is either a positive integer or 'all' or a [range]. Default is all.
|
215 |
If 'n' is passed as argument, the first 'n' suitable jobs will be submitted. Please note that this is behaviour is different from other commands, where -command N means act the command to the job N, and not to the first N jobs. If a [range] is passed, the selected jobs will be submitted.
|
216 |
This option must be used in conjunction with -create (to create and submit immediately) or with -continue (which is assumed by default), to submit previously created jobs. Failure to do so will stop CRAB and generate an error message. See also I<-continue>.
|
217 |
|
218 |
=item B<-continue [dir] | -c [dir]>
|
219 |
|
220 |
Apply the action on the task stored on directory [dir]. If the task directory is the standard one (crab_0_date_time), the more recent in time is taken. Any other directory must be specified.
|
221 |
Basically all commands (but -create) need -continue, so it is automatically assumed. Of course, the standard task directory is used in this case.
|
222 |
|
223 |
=item B<-status>
|
224 |
|
225 |
Check the status of the jobs, in all states. All the info (e.g. application and wrapper exit codes) will be available only after the output retrieval.
|
226 |
|
227 |
=item B<-getoutput|-get [range]>
|
228 |
|
229 |
Retrieve the output declared by the user via the output sandbox. By default the output will be put in task working dir under I<res> subdirectory. This can be changed via config parameters. B<Be extra sure that you have enough free space>. See I<range> below for syntax.
|
230 |
|
231 |
=item B<-publish [dbs_url]>
|
232 |
|
233 |
Publish user output in a local DBS instance after retrieving of output. By default the publish uses the dbs_url_for_publication specified in the crab.cfg file, otherwise you can write it as argument of this option.
|
234 |
|
235 |
=item B<-resubmit [range]>
|
236 |
|
237 |
Resubmit jobs which have been previously submitted and have been either I<killed> or are I<aborted>. See I<range> below for syntax.
|
238 |
The resubmit option can be used only with CRAB without server. For the server this option will be implemented as soon as possible
|
239 |
|
240 |
=item B<-kill [range]>
|
241 |
|
242 |
Kill (cancel) jobs which have been submitted to the scheduler. A range B<must> be used in all cases, no default value is set.
|
243 |
|
244 |
=item B<-testJdl [range]>
|
245 |
|
246 |
Check if the job can find compatible resources. It's equivalent of doing I<edg-job-list-match> on edg.
|
247 |
|
248 |
=item B<-printId [range]>
|
249 |
|
250 |
Just print the job identifier, which can be the SID (Grid job identifier) of the job(s) or the taskId if you are using CRAB with the server or local scheduler Id.
|
251 |
|
252 |
=item B<-printJdl [range]>
|
253 |
|
254 |
Collect the full Job Description in a file located under share directory. The file base name is File- .
|
255 |
|
256 |
=item B<-postMortem [range]>
|
257 |
|
258 |
Try to collect more information of the job from the scheduler point of view.
|
259 |
|
260 |
=item B<-list [range]>
|
261 |
|
262 |
Dump technical information about jobs: for developers only.
|
263 |
|
264 |
=item B<-clean [dir]>
|
265 |
|
266 |
Clean up (i.e. erase) the task working directory after a check whether there are still running jobs. In case, you are notified and asked to kill them or retrieve their output. B<Warning> this will possibly delete also the output produced by the task (if any)!
|
267 |
|
268 |
=item B<-help [format] | -h [format]>
|
269 |
|
270 |
This help. It can be produced in three different I<format>: I<man> (default), I<tex> and I<html>.
|
271 |
|
272 |
=item B<-v>
|
273 |
|
274 |
Print the version and exit.
|
275 |
|
276 |
=item B<range>
|
277 |
|
278 |
The range to be used in many of the above commands has the following syntax. It is a comma separated list of jobs ranges, each of which may be a job number, or a job range of the form first-last.
|
279 |
Example: 1,3-5,8 = {1,3,4,5,8}
|
280 |
|
281 |
=back
|
282 |
|
283 |
=head1 OPTION
|
284 |
|
285 |
=over 4
|
286 |
|
287 |
=item B<-cfg [file]>
|
288 |
|
289 |
Configuration file name. Default is B<crab.cfg>.
|
290 |
|
291 |
=item B<-debug [level]>
|
292 |
|
293 |
Set the debug level: high number for high verbosity.
|
294 |
|
295 |
=back
|
296 |
|
297 |
=head1 CONFIGURATION PARAMETERS
|
298 |
|
299 |
All the parameter describe in this section can be defined in the CRAB configuration file. The configuration file has different sections: [CRAB], [USER], etc. Each parameter must be defined in its proper section. An alternative way to pass a config parameter to CRAB is via command line interface; the syntax is: crab -SECTION.key value . For example I<crab -USER.outputdir MyDirWithFullPath> .
|
300 |
The parameters passed to CRAB at the creation step are stored, so they cannot be changed by changing the original crab.cfg . On the other hand the task is protected from any accidental change. If you want to change any parameters, this require the creation of a new task.
|
301 |
Mandatory parameters are flagged with a *.
|
302 |
|
303 |
B<[CRAB]>
|
304 |
|
305 |
=over 4
|
306 |
|
307 |
=item B<jobtype *>
|
308 |
|
309 |
The type of the job to be executed: I<cmssw> jobtypes are supported
|
310 |
|
311 |
=item B<scheduler *>
|
312 |
|
313 |
The scheduler to be used: I<glitecoll> is the more efficient grid scheduler and should be used. Other choice are I<glite>, same as I<glitecoll> but without bulk submission (and so slower) or I<condor_g> (see specific paragraph) or I<edg> which is the former Grid scheduler, which will be dismissed in some future
|
314 |
From version 210, also local scheduler are supported, for the time being only at CERN. I<LSF> is the standard CERN local scheduler or I<CAF> which is LSF dedicated to CERN Analysis Facilities.
|
315 |
|
316 |
=item B<server_name>
|
317 |
|
318 |
To use the CRAB-server support it is needed to fill this key with server name as <Server_DOMAIN> (e.g. cnaf,fnal). If I<server_name=None> crab works in standalone way.
|
319 |
The server available to users can be found from CRAB web page.
|
320 |
|
321 |
=back
|
322 |
|
323 |
B<[CMSSW]>
|
324 |
|
325 |
=over 4
|
326 |
|
327 |
=item B<datasetpath *>
|
328 |
|
329 |
the path of processed dataset as defined on the DBS. It comes with the format I</PrimaryDataset/DataTier/Process> . In case no input is needed I<None> must be specified.
|
330 |
|
331 |
=item B<runselection *>
|
332 |
|
333 |
within a dataset you can restrict to run on a specific run number or run number range. For example runselection=XYZ or runselection=XYZ1-XYZ2 .
|
334 |
|
335 |
=item B<pset *>
|
336 |
|
337 |
the ParameterSet to be used
|
338 |
|
339 |
=item I<Of the following three parameter exactly two must be used, otherwise CRAB will complain.>
|
340 |
|
341 |
=item B<total_number_of_events *>
|
342 |
|
343 |
the number of events to be processed. To access all available events, use I<-1>. Of course, the latter option is not viable in case of no input. In this case, the total number of events will be used to split the task in jobs, together with I<event_per_job>.
|
344 |
|
345 |
=item B<events_per_job*>
|
346 |
|
347 |
number of events to be accessed by each job. Since a job cannot cross the boundary of a fileblock it might be that the actual number of events per job is not exactly what you asked for. It can be used also with No input.
|
348 |
|
349 |
=item B<number_of_jobs *>
|
350 |
|
351 |
Define the number of job to be run for the task. The number of event for each job is computed taking into account the total number of events required as well as the granularity of EventCollections. Can be used also with No input.
|
352 |
|
353 |
=item B<output_file *>
|
354 |
|
355 |
the output files produced by your application (comma separated list).
|
356 |
|
357 |
=item B<increment_seeds>
|
358 |
|
359 |
Specifies a comma separated list of seeds to increment from job to job. The initial value is taken
|
360 |
from the CMSSW config file. I<increment_seeds=sourceSeed,g4SimHits> will set sourceSeed=11,12,13 and g4SimHits=21,22,23 on
|
361 |
subsequent jobs if the values of the two seeds are 10 and 20 in the CMSSW config file.
|
362 |
|
363 |
See also I<preserve_seeds>. Seeds not listed in I<increment_seeds> or I<preserve_seeds> are randomly set for each job.
|
364 |
|
365 |
=item B<preserve_seeds>
|
366 |
|
367 |
Specifies a comma separated list of seeds to which CRAB will not change from their values in the user's
|
368 |
CMSSW config file. I<preserve_seeds=sourceSeed,g4SimHits> will leave the Pythia and GEANT seeds the same for every job.
|
369 |
|
370 |
See also I<increment_seeds>. Seeds not listed in I<increment_seeds> or I<preserve_seeds> are randomly set for each job.
|
371 |
|
372 |
=item B<pythia_seed>
|
373 |
|
374 |
This parameter is deprecated. See the documentation for I<increment_seeds> and I<preserve_seeds>.
|
375 |
|
376 |
=item B<vtx_seed>
|
377 |
|
378 |
This parameter is deprecated. See the documentation for I<increment_seeds> and I<preserve_seeds>.
|
379 |
|
380 |
=item B<g4_seed>
|
381 |
|
382 |
This parameter is deprecated. See the documentation for I<increment_seeds> and I<preserve_seeds>.
|
383 |
|
384 |
=item B<mix_seed>
|
385 |
|
386 |
This parameter is deprecated. See the documentation for I<increment_seeds> and I<preserve_seeds>.
|
387 |
|
388 |
=item B<first_run>
|
389 |
|
390 |
First run to be generated in a generation jobs. Relevant only for no-input workflow.
|
391 |
|
392 |
=item B<executable>
|
393 |
|
394 |
The name of the executable to be run on remote WN. The default is cmsrun. The executable is either to be found on the release area of the WN, or has been built on user working area on the UI and is (automatically) shipped to WN. If you want to run a script (which might internally call I<cmsrun>, use B<USER.script_exe> instead.
|
395 |
|
396 |
=item I<DBS and DLS parameters:>
|
397 |
|
398 |
=item B<dbs_url>
|
399 |
|
400 |
The URL of the DBS query page. For expert only.
|
401 |
|
402 |
=back
|
403 |
|
404 |
B<[USER]>
|
405 |
|
406 |
=over 4
|
407 |
|
408 |
=item B<additional_input_files>
|
409 |
|
410 |
Any additional input file you want to ship to WN: comma separated list. These are the files which might be needed by your executable: they will be placed in the WN working dir. You don\'t need to specify the I<ParameterSet> you are using, which will be included automatically. Wildcards are allowed.
|
411 |
|
412 |
=item B<script_exe>
|
413 |
|
414 |
A user script that will be run on WN (instead of default cmsrun). It\'s up to the user to setup properly the script itself to run on WN enviroment. CRAB guarantees that the CMSSW environment is setup (e.g. scram is in the path) and that the modified pset.cfg will be placed in the working directory, with name CMSSW.cfg . The user must ensure that a job report named crab_fjr.xml will be written. This can be guaranteed by passing the arguments "-j crab_fjr.xml" to cmsRun in the script. The script itself will be added automatically to the input sandbox.
|
415 |
|
416 |
=item B<ui_working_dir>
|
417 |
|
418 |
Name of the working directory for the current task. By default, a name I<crab_0_(date)_(time)> will be used. If this card is set, any CRAB command which require I<-continue> need to specify also the name of the working directory. A special syntax is also possible, to reuse the name of the dataset provided before: I<ui_working_dir : %(dataset)s> . In this case, if e.g. the dataset is SingleMuon, the ui_working_dir will be set to SingleMuon as well.
|
419 |
|
420 |
=item B<thresholdLevel>
|
421 |
|
422 |
This has to be a value between 0 and 100, that indicates the percentage of task completeness (jobs in a ended state are complete, even if failed). The server will notify the user by e-mail (look at the field: B<eMail>) when the task will reach the specified threshold. Works just with the server_mode = 1.
|
423 |
|
424 |
=item B<eMail>
|
425 |
|
426 |
The server will notify the specified e-mail when the task will reaches the specified B<thresholdLevel>. A notification is also sent when the task will reach the 100\% of completeness. This field can also be a list of e-mail: "B<eMail = user1@cern.ch, user2@cern.ch>". Works just with the server_mode = 1.
|
427 |
|
428 |
=item B<return_data *>
|
429 |
|
430 |
The output produced by the executable on WN is returned (via output sandbox) to the UI, by issuing the I<-getoutput> command. B<Warning>: this option should be used only for I<small> output, say less than 10MB, since the sandbox cannot accommodate big files. Depending on Resource Broker used, a size limit on output sandbox can be applied: bigger files will be truncated. To be used in alternative to I<copy_data>.
|
431 |
|
432 |
=item B<outputdir>
|
433 |
|
434 |
To be used together with I<return_data>. Directory on user interface where to store the output. Full path is mandatory, "~/" is not allowed: the default location of returned output is ui_working_dir/res .
|
435 |
|
436 |
=item B<logdir>
|
437 |
|
438 |
To be used together with I<return_data>. Directory on user interface where to store the standard output and error. Full path is mandatory, "~/" is not allowed: the default location of returned output is ui_working_dir/res .
|
439 |
|
440 |
=item B<copy_data *>
|
441 |
|
442 |
The output (only that produced by the executable, not the std-out and err) is copied to a Storage Element of your choice (see below). To be used as an alternative to I<return_data> and recommended in case of large output.
|
443 |
|
444 |
=item B<storage_element>
|
445 |
|
446 |
To be used together with I<copy_data>. Storage Element name.
|
447 |
|
448 |
=item B<storage_path>
|
449 |
|
450 |
To be used together with I<copy_data>. Path where to put output files on Storage Element. Full path is needed, and the directory must be writeable by all.
|
451 |
|
452 |
=item B<copyCommand>
|
453 |
|
454 |
Only for LSF scheduler: allow to define the command to be used to copy the output to final location. Default is rfcp
|
455 |
|
456 |
=item B<xml_report>
|
457 |
|
458 |
To be used to switch off the screen report during the status query, enabling the db serialization in a file. Specifying I<xml_report> = FileName CRAB will serialize the DB into CRAB_WORKING_DIR/share/FileName.
|
459 |
|
460 |
=back
|
461 |
|
462 |
B<[EDG]>
|
463 |
|
464 |
=over 4
|
465 |
|
466 |
=item B<RB>
|
467 |
|
468 |
Which RB you want to use instead of the default one, as defined in the configuration of your UI. The ones available for CMS are I<CERN> and I<CNAF>. They are actually identical, being a collection of all RB/WMS available for CMS: the configuration files needed to change the broker will be automatically downloaded from CRAB web page and used.
|
469 |
You can use any other RB which is available, if you provide the proper configuration files. E.g., for RB XYZ, you should provide I<edg_wl_ui.conf.CMS_XYZ> and I<edg_wl_ui_cmd_var.conf.CMS_XYZ> for EDG RB, or I<glite.conf.CMS_XYZ> for glite WMS. These files are searched for in the current working directory, and, if not found, on crab web page. So, if you put your private configuration files in the working directory, they will be used, even if they are not available on crab web page.
|
470 |
Please get in contact with crab team if you wish to provide your RB or WMS as a service to the CMS community.
|
471 |
|
472 |
=item B<proxy_server>
|
473 |
|
474 |
The proxy server to which you delegate the responsibility to renew your proxy once expired. The default is I<myproxy.cern.ch> : change only if you B<really> know what you are doing.
|
475 |
|
476 |
=item B<role>
|
477 |
|
478 |
The role to be set in the VOMS. See VOMS documentation for more info.
|
479 |
|
480 |
=item B<group>
|
481 |
|
482 |
The group to be set in the VOMS, See VOMS documentation for more info.
|
483 |
|
484 |
=item B<dont_check_proxy>
|
485 |
|
486 |
If you do not want CRAB to check your proxy. The creation of the proxy (with proper length), its delegation to a myproxyserver is your responsibility.
|
487 |
|
488 |
=item B<requirements>
|
489 |
|
490 |
Any other requirements to be add to JDL. Must be written in compliance with JDL syntax (see LCG user manual for further info). No requirement on Computing element must be set.
|
491 |
|
492 |
=item B<additional_jdl_parameters:>
|
493 |
|
494 |
Any other parameters you want to add to jdl file:semicolon separated list, each
|
495 |
item B<must> be complete, including the closing ";".
|
496 |
|
497 |
=item B<wms_service>
|
498 |
|
499 |
With this field it\'s also possible to specify which WMS you want to use (https://hostname:port/pathcode) where "hostname" is WMS\' name, the "port" generally is 7443 and the "pathcode" should be something like "glite_wms_wmproxy_server".
|
500 |
|
501 |
=item B<max_cpu_time>
|
502 |
|
503 |
Maximum CPU time needed to finish one job. It will be used to select a suitable queue on the CE. Time in minutes.
|
504 |
|
505 |
=item B<max_wall_clock_time>
|
506 |
|
507 |
Same as previous, but with real time, and not CPU one.
|
508 |
|
509 |
=item B<CE_black_list>
|
510 |
|
511 |
All the CE (Computing Element) whose name contains the following strings (comma separated list) will not be considered for submission. Use the dns domain (e.g. fnal, cern, ifae, fzk, cnaf, lnl,....)
|
512 |
|
513 |
=item B<CE_white_list>
|
514 |
|
515 |
Only the CE (Computing Element) whose name contains the following strings (comma separated list) will be considered for submission. Use the dns domain (e.g. fnal, cern, ifae, fzk, cnaf, lnl,....). Please note that if the selected CE(s) does not contain the data you want to access, no submission can take place.
|
516 |
|
517 |
=item B<SE_black_list>
|
518 |
|
519 |
All the SE (Storage Element) whose name contains the following strings (comma separated list) will not be considered for submission.It works only if a datasetpath is specified.
|
520 |
|
521 |
=item B<SE_white_list>
|
522 |
|
523 |
Only the SE (Storage Element) whose name contains the following strings (comma separated list) will be considered for submission.It works only if a datasetpath is specified. Please note that if the selected CE(s) does not contain the data you want to access, no submission can take place.
|
524 |
|
525 |
=item B<virtual_organization>
|
526 |
|
527 |
You don\'t want to change this: it\'s cms!
|
528 |
|
529 |
=item B<retry_count>
|
530 |
|
531 |
Number of time the Grid will try to resubmit your job in case of Grid related problem.
|
532 |
|
533 |
=item B<shallow_retry_count>
|
534 |
|
535 |
Number of time shallow resubmission the Grid will try: resubmissions are tried B<only> if the job aborted B<before> start. So you are guaranteed that your jobs run strictly once.
|
536 |
|
537 |
=item B<maxtarballsize>
|
538 |
|
539 |
Maximum size of tar-ball in Mb. If bigger, an error will be generated. The actual limit is that on the RB input sandbox. Default is 9.5 Mb (sandbox limit is 10 Mb)
|
540 |
|
541 |
=back
|
542 |
|
543 |
B<[LSF]>
|
544 |
|
545 |
=over 4
|
546 |
|
547 |
=item B<queue>
|
548 |
|
549 |
The LSF queue you want to use: if none, the default one will be used. For CAF, the proper queue will be automatically selected.
|
550 |
|
551 |
=item B<resource>
|
552 |
|
553 |
The resources to be used within a LSF queue. Again, for CAF, the right one is selected.
|
554 |
|
555 |
|
556 |
=back
|
557 |
|
558 |
=head1 FILES
|
559 |
|
560 |
I<crab> uses a configuration file I<crab.cfg> which contains configuration parameters. This file is written in the INI-style. The default filename can be changed by the I<-cfg> option.
|
561 |
|
562 |
I<crab> creates by default a working directory 'crab_0_E<lt>dateE<gt>_E<lt>timeE<gt>'
|
563 |
|
564 |
I<crab> saves all command lines in the file I<crab.history>.
|
565 |
|
566 |
=head1 HISTORY
|
567 |
|
568 |
B<CRAB> is a tool for the CMS analysis on the Grid environment. It is based on the ideas from CMSprod, a production tool originally implemented by Nikolai Smirnov.
|
569 |
|
570 |
=head1 AUTHORS
|
571 |
|
572 |
"""
|
573 |
author_string = '\n'
|
574 |
for auth in common.prog_authors:
|
575 |
#author = auth[0] + ' (' + auth[2] + ')' + ' E<lt>'+auth[1]+'E<gt>,\n'
|
576 |
author = auth[0] + ' E<lt>' + auth[1] +'E<gt>,\n'
|
577 |
author_string = author_string + author
|
578 |
pass
|
579 |
help_string = help_string + author_string[:-2] + '.'\
|
580 |
"""
|
581 |
|
582 |
=cut
|
583 |
"""
|
584 |
|
585 |
pod = tempfile.mktemp()+'.pod'
|
586 |
pod_file = open(pod, 'w')
|
587 |
pod_file.write(help_string)
|
588 |
pod_file.close()
|
589 |
|
590 |
if option == 'man':
|
591 |
man = tempfile.mktemp()
|
592 |
pod2man = 'pod2man --center=" " --release=" " '+pod+' >'+man
|
593 |
os.system(pod2man)
|
594 |
os.system('man '+man)
|
595 |
pass
|
596 |
elif option == 'tex':
|
597 |
fname = common.prog_name+'-v'+common.prog_version_str
|
598 |
tex0 = tempfile.mktemp()+'.tex'
|
599 |
pod2tex = 'pod2latex -full -out '+tex0+' '+pod
|
600 |
os.system(pod2tex)
|
601 |
tex = fname+'.tex'
|
602 |
tex_old = open(tex0, 'r')
|
603 |
tex_new = open(tex, 'w')
|
604 |
for s in tex_old.readlines():
|
605 |
if string.find(s, '\\begin{document}') >= 0:
|
606 |
tex_new.write('\\title{'+common.prog_name+'\\\\'+
|
607 |
'(Version '+common.prog_version_str+')}\n')
|
608 |
tex_new.write('\\author{\n')
|
609 |
for auth in common.prog_authors:
|
610 |
tex_new.write(' '+auth[0]+
|
611 |
'\\thanks{'+auth[1]+'} \\\\\n')
|
612 |
tex_new.write('}\n')
|
613 |
tex_new.write('\\date{}\n')
|
614 |
elif string.find(s, '\\tableofcontents') >= 0:
|
615 |
tex_new.write('\\maketitle\n')
|
616 |
continue
|
617 |
elif string.find(s, '\\clearpage') >= 0:
|
618 |
continue
|
619 |
tex_new.write(s)
|
620 |
tex_old.close()
|
621 |
tex_new.close()
|
622 |
print 'See '+tex
|
623 |
pass
|
624 |
elif option == 'html':
|
625 |
fname = common.prog_name+'-v'+common.prog_version_str+'.html'
|
626 |
pod2html = 'pod2html --title='+common.prog_name+\
|
627 |
' --infile='+pod+' --outfile='+fname
|
628 |
os.system(pod2html)
|
629 |
print 'See '+fname
|
630 |
pass
|
631 |
elif option == 'txt':
|
632 |
fname = common.prog_name+'-v'+common.prog_version_str+'.txt'
|
633 |
pod2text = 'pod2text '+pod+' '+fname
|
634 |
os.system(pod2text)
|
635 |
print 'See '+fname
|
636 |
pass
|
637 |
|
638 |
sys.exit(0)
|