ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/crab_help.py
Revision: 1.1
Committed: Thu Mar 10 16:20:30 2005 UTC (20 years, 1 month ago) by nsmirnov
Content type: text/x-python
Branch: MAIN
Log Message:
initial set of files

File Contents

# User Rev Content
1 nsmirnov 1.1
2     ###########################################################################
3     #
4     # H E L P F U N C T I O N S
5     #
6     ###########################################################################
7    
8     import common
9    
10     import sys, os, string
11     import tempfile
12    
13     ###########################################################################
14     def usage():
15     usa_string = common.prog_name + """ [options]
16     The most useful general options (use '-h' to get complete help):
17     -create n -- Create only n jobs. Default is 'all'. bunch_creation will become obsolete
18     -submit n -- Submit only n jobs. Default is 0. bunch_submission will become obsolete
19     -mon | -monitor | -autoretrieve [value in secs] -- Retrieve the output at the end of the job, plus simple monitoring. No value means default to 60 seconds. autoretrieve will become obsolete
20     -continue [dir] | -c [dir] -- Continue creation and submission of jobs from <dir>.
21     -h [format] -- Detailed help. Formats: man (default), tex, html.
22     -cfg fname -- Configuration file name. Default is 'crab.cfg'.
23     -use_boss flag -- If flag = 1 then BOSS will be used. Default is 0.
24     -use_jam flag -- If flag = 1 the JAM monitoring is used. Default is 0
25     -v -- Print version and exit.
26    
27     Example:
28     crab.py -create 1 -submit 1
29     """
30     print usa_string
31     sys.exit(2)
32    
33     ###########################################################################
34     def help(option='man'):
35     help_string = """
36     =pod
37    
38     =head1 NAME
39    
40     B<CRAB>: B<C>ms B<R>emote B<A>nalysis B<B>uilder
41    
42     """+common.prog_name+""" version: """+common.prog_version_str+""" to use with PubDB_V3_1
43    
44     This tool _must_ be used from an User Interface and the user is supposed to
45     have a valid GRID certificate and an active proxy.
46    
47    
48     =head1 SYNOPSIS
49    
50     B<"""+common.prog_name+""".py> [I<options>]
51    
52    
53     =head1 DESCRIPTION
54    
55     CRAB is a Python program intended to simplify the process of creation and submission into GRID environment of CMS analysis jobs.
56    
57     Parameters and card-files for analysis are to by provided by user changing the configuration file crab.cfg.
58    
59     CRAB generates scripts and additional data files for each job. The produced scripts are submitted directly to the Grid.
60    
61    
62     =head1 BEFORE STARTING
63    
64     A) Develop your code in your ORCA working area, build with the usual `scram project ORCA ORCA_X_Y_Z`
65     From user scram area (which can be anywhere eg /home/fanzago/ORCA_8_2_0/), issue the usual command
66     > eval `scram runtime -sh|csh`
67    
68     B) Move to your CRAB working area (that is UserTools/src) and modify the configuration file "crab.cfg" (into UserTools/src).
69     The most important section is called [USER] where the user declares:
70    
71     Mandatory!
72     *) dataset and owner to analyze
73     *) the ORCA executable name (e.g. EXDigiStatistics).
74     CRAB finds the executable into the user scram area (e.g. /home/fanzago/ORCA_8_2_0/bin/Linux__2.4/here!).
75     *) the name of output produced by ORCA executable. Empty entry means no output produced
76     *) the total number of events to analyze, the number of events for each job and the number of the first event to analyze.
77     *) the orcarc card to use. This card will be modified by crab according with the job splitting.
78     Use the very same cars you used in your interactive test: CRAB will modify what is needed.
79    
80     Might be useful:
81     *) additional_input_files (from 005)
82     Comma separated list of files to be submitted via input sandbox.
83     The files will be put in the working directory on WN. It's user responsibility to actually use them!
84     *) data_tiers (new from ver 004)
85     The possible choices are "DST,Digi,Hit" (comma separated list, mind the case!)
86     If set, the job will be able to access not only the data tier corresponding to the dataset/owner asked, but also
87     to its "parents". This requires that the parents are actually published in the same site of the primary dataset/owner.
88     If not set, only the primary data tier will be accessible
89     *) output_dir e log_dir, path of directory where crab will put the std_error and std_output of job.
90     If these parameters are commented, error and output will be put into the directory where sh and jdl script are (crab_0_date_time).
91     These parameter will be use only with the automatic retrieve of output option (-autoretrieve)
92    
93     Optional:
94     *) how to pack the ORCA code provided by the user (up to now is possible only as tgz)
95     *) the name of tgz archive ( called by default "default.tgz")
96     *) run_jam e output_jam are parameter for JAM monitoring (used with option use_jam)
97     *) the name of UI directory where crab will create jobs. By default the name is "crab_data_time"
98    
99     C) Before submitting jobs, user needs to create a proxy with the command:
100     grid-proxy-init
101    
102     At CERN, you can use "lxplus" as a UI by sourcing the file
103     source /afs/cern.ch/cms/LCG/LCG-2/UI/cms_ui_env.csh
104    
105     WARNING:
106     Since the LCG working nodes actually installed on different site still use RedHat7.3, you can only submit jobs from a UI based on RH7.3.
107     At CERN, this is possible using "lxplus7".
108    
109     =head1 HOW TO RUN CRAB FOR THE IMPATIENT USER
110    
111     Please, read all anyway!
112    
113     > ./crab.py -create 2
114     create 2 jobs (no submission!)
115    
116     > ./crab.py -bunch_create 0 -submit 2 -continue [ui_working_dir]
117     create 0, submit 2, the ones already created (-continue)
118    
119     > ./crab.py -create 2 -submit 2
120     create _and_ submit 2 jobs
121    
122     > ./crab.py -create 2 -submit 2 -autoretrieve
123     create, submit 2 jobs and retrieve the output at the end, plus simple monitoring
124    
125    
126     =head1 HOW TO RUN CRAB
127    
128     The executable file is crab.py
129    
130     I<If you want only create jobs (NO submission):>
131     > ./crab.py -create 2
132    
133     Crab creates a directory called crab_0_"data"_"time" where you can find 4 subdirectories
134     job: contains sh, jdl and card
135     share: contains the "file_to_send", that provides the informations retrieved by local_pubdb
136     log: there are the log of crab and the grid ID of submitted jobs
137     res empty...
138    
139     The option "register_data" allows to copy and register the ORCA output (e.g.the .root file) into a
140     Storage element and RLS catalog ( .root file). To do it, put register_data 1 (see information about this option)
141    
142     I<If you to submit the previously created jobs:>
143     > ./crab.py -create 0 -submit 2 -continue [ui_working_dir]
144     (the submission is done using edg-job-submit command).
145    
146     To see the status of job, the user needs to run
147     > edg-job-status -i crab_data_time/log/submission_id.log
148    
149     To retrieve the output
150     > edg-job-get-output -i crab_data_time/log/submission_id.log
151    
152     If you want to use the automatic retrieve of output, add the option "-autoretrieve"
153     > ./crab.py -create 0 -submit 2 -autoretrieve -continue [ui_working_dir]
154    
155     In this case the monitoring (status) and the get-output will be done I<automatically>.
156     You can find some information about the status into the log of crab (directory crab_0_data_time/log).
157    
158     The job monitoring and output retrieval runs asynchronously, that is you can submit your jobs and only afterwards
159     start the retrieval of output.
160     > ./crab.py -create 0 -submit 2 -continue [ui_working_dir]
161     > ./crab.py -create 0 -submit 0 -autoretrieve -continue [ui_working_dir]
162    
163     If you want to use the JAM monitornig, add the option -use_jam (0 by default)
164     > ./crab.py -create 1 -submit 1 -use_jam 1 -autoretrieve
165    
166    
167     =head1 SOME OTHER INFO:
168    
169     You can find a useful file into directory "ui_working_dir"/share/script.list.
170     Here are written the name of job (with jobsplitting) that are to be created and submitted.
171     Near the name a letter that means:
172    
173     X = job to create
174     C = job created but not submitted
175     S = job submitted
176     M = job being monitored
177    
178    
179     =head1 KNOWN PROBLEMS:
180    
181     1) It is possible to read a warning messagge when crab start to run, depending on ORCA version (e.g. ORCA_8_6_0):
182     .../src/scram.py:13: DeprecationWarning: Non-ASCII character '\xa7' in file /opt/edg/bin/UIutils.py on line 225, but no encoding declared; see http://www.python.org/peps/pep-0263.html for details
183     import UIutils
184     /opt/edg/lib/python/edg_wl_userinterface_common_NsWrapper.py:4: RuntimeWarning: Python C API version mismatch for module _edg_wl_userinterface_common_NsWrapper: This Python has API version 1012, module _edg_wl_userinterface_common_NsWrapper has version 1011.
185     import _edg_wl_userinterface_common_NsWrapper
186     /opt/edg/lib/python/edg_wl_userinterface_common_LbWrapper.py:4: RuntimeWarning: Python C API version mismatch for module _edg_wl_userinterface_common_LbWrapper:
187     This Python has API version 1012,
188     module _edg_wl_userinterface_common_LbWrapper has version 1011.
189     import _edg_wl_userinterface_common_LbWrapper
190     /opt/edg/lib/python/edg_wl_userinterface_common_AdWrapper.py:4: RuntimeWarning: Python C API version mismatch for module _edg_wl_userinterface_common_AdWrapper: This Python has API version 1012,
191    
192     It seems to depend on a mismatch between the version of python used by
193     ORCA_8_6_0 and the version used to "compile" /opt/edg/etc/bin/UIutils
194    
195     Not critical !
196    
197    
198     2) If you are using the option -autoretrieve, when the submission step
199     finishes, the shell prompt doesn't retun. Just press enter!
200    
201     3) If you use -monitor and then exit the shell, the autoretrieve thread are killed...
202    
203    
204     =head1 WORK IN PROGRESS:
205    
206     Implementing BOSS monitoring.
207     Changing monitor function.
208     Final merging
209    
210    
211     =head1 OPTIONS
212    
213     =over 4
214    
215     =item B<-bunch_creation n | -create n>
216    
217     Create n jobs maximum. 'n' is either positive integer or 'all'.
218     Default is 'all'.
219     See also I<-continue>.
220    
221     =item B<-bunch_size n>
222    
223     The same as '-bunch_creation n' and '-bunch_submission n'.
224    
225     =item B<-bunch_submission n | -submit n>
226    
227     Submit n jobs maximum. 'n' is either positive integer or 'all'.
228     Default is 0.
229     See also I<-continue>.
230    
231     =item B<-mon | -monitor | -autoretrieve>
232    
233     With this option the monitoring (status) and the get-output of jobs will be done I<automatically>.
234     You can find some information about the status into the log of crab (directory crab_0_data_time/log).
235    
236     =item B<-continue [dir]>
237    
238     Continue submission of batch jobs from 'dir'. 'dir' is a top level directory
239     created when scripts were generated.
240     By default the name of the dir is I<crab_0_date_time>.
241     If the name of dir is different (selected by the user, changing in crab.cfg file the ui_working_dir parameter),
242     it is necessary to specify it in -continue "ui_working_dir"
243    
244     Examples:
245     1) Into the cfg file the line "ui_working_dir" is commented:
246     the command
247     > ./crab.py -create 1 -submit 1 -register_data 0
248     creates and submit 1 job. The name of directory where the job is creates, is ".../UserTools/src/crab_data_time"
249    
250     If you want to create and submitt an other jobs:
251     > ./crab.py -create 1 -submit 1 -register_data 0 -continue
252     the job will be created into the same directory ".../UserTools/src/crab_data_time"
253    
254     2) Into the cfg file the line "ui_working_dir" is uncommented:
255     the command
256     > ./crab.py -create 1 -submit 1 -register_data 0
257     creates and submit 1 job. The directory where the job is creates, is ".../UserTools/src/'ui_working_dir'"
258    
259     If you want to create and submitt 1 other jobs:
260     > ./crab.py -create 1 -submit 1 -register_data 0 -continue 'ui_working_dir'
261     In this case you need to specified the name of directory
262    
263    
264     Another way to modify the value of parameter into the cfg file, without change the cfg file, is to write like option the parameter that you want to change.
265    
266     Example:
267     > ./crab.py -create 1 -submit 1 -register_data 0 -USER.ui_working_dir name_that_you_want
268     and to continue
269     > ./crab.py -create 1 -submit 1 -register_data 0 -continue name_that_you_want
270    
271     =item B<-h [format]>
272    
273     Detailed help. Formats: man (default), tex, html.
274    
275     =item B<-ini fname>
276    
277     Configuration file name. Default is 'crab.cfg'.
278     I<'none'> is a special value used to ignore the default file.
279    
280     =item B<-Q>
281    
282     Quiet mode, i.e. no output on the screen.
283    
284     =item B<-register_data flag>
285    
286     register_data 1 allows to copy and register the output of ORCA executable into
287     the Storage Element "close" to the Worker node where the job is running, or, if
288     the close has problem, into a storage element provided by the user into the
289     configuration file.
290    
291     Into crab.cfg:
292     [EDG]
293     ...
294     storage_element = gridit002.pd.infn.it <--- name of "backup storage element" (to use if the CloseSE isn\'t available)
295     storage_path = /flatfiles/SE00/cms/ <--- directory into the SE where a cms user can write
296     ...
297    
298     [USER]
299     output_storage_subdir = fede/orca/25_11_2004/ <--- subdirectory of cms area where the output will be stored
300     Example: we can found the output stored in
301     1) closeSE/mountpoint_cms/[USER].output_storage_subdir/[USER].output_file
302     or (if close has problem)
303     2) [EDG].storage_element/[EDG].storage_path/[USER].output_storage_subdir/[USER].output_file
304    
305     into RLS the lfn = [USER].output_storage_subdir/[USER].output_file will be registered
306    
307     The value of "register_data" parameter can be written into the cfg file into the section
308     [CRAB]
309     ...
310     register_data = 0 or
311     register_data = 1
312     in order to avoid to write it like command line option.
313     Default is 0
314    
315     =item B<-return_data flag>
316    
317     If flag = 0 then produced data will not be returned to user.
318     Default is 0 for 'edg' and always 1 for local schedulers.
319    
320     =item B<-use_boss flag>
321    
322     If flag = 1 then the BOSS metascheduler will be used.
323     Default is 0, i.e. BOSS is not used.
324    
325     =item B<-usecloseCE>
326    
327     If flag = 1 then in jdl and classad files are written InputData that
328     contains LFN of input_data, ReplicaCatalog that contains RLS URL (for
329     example rls://datatag2.cnaf.infn.it) and DataAccessProtocol that contains
330     protocol used to data access (for example gsiftp).
331     In this case the Resource Broker selects a CE closest to SE where input_data
332     are stored, in order to run jobs.
333    
334     =item B<-V>
335    
336     Verbose, i.e. produce more output.
337    
338     =item B<-v>
339    
340     Print version.
341    
342     =item B<->I<any_key value>
343    
344     Any unrecognized option is treated as a configuration parameter with
345     specified value. Can be used for the command-line redefinition
346     of configuration parameters from an ini-file. For example, a user wants
347     to submit jobs into EDG but he does not like the default User
348     Interface configuration file in which a location of a Resource Broker is
349     specified. One possibility is to edit the ini-file
350     changing the value of the 'rb_config' parameter in the 'EDG' section.
351     The second possibility is to provide this value as a command-line
352     option: I<-EDG.rb_config my_ui_config>.
353    
354     Can be used also for specification of private production parameters, e.g.
355     I<-Private.executablename myjob> (Note all lowercase letters in the second part
356     of the option, i.e. after the dot).
357    
358     =back
359    
360    
361     =head1 FILES
362    
363     I<crab> uses initialization file I<crab.cfg> which contains
364     configuration parameters. This file is written in the Windows INI-style.
365     The default filename can be changed by the I<-cfg> option.
366    
367     I<crab> creates by default a working directory
368     'crab_0_E<lt>dateE<gt>_E<lt>timeE<gt>'
369    
370     I<crab> saves all command lines in the file I<crab.history>.
371    
372    
373     =head1 HISTORY
374    
375     B<crab> is a tool for the CMS analysis on the grid environment.
376     It is based on the ideas from CMSprod, a production tools
377     implemented by Nikolai Smirnov.
378    
379     =head1 AUTHORS
380    
381     """
382     author_string = '\n'
383     for auth in common.prog_authors:
384     #author = auth[0] + ' (' + auth[2] + ')' + ' E<lt>'+auth[1]+'E<gt>,\n'
385     author = auth[0] + ' E<lt>' + auth[1] +'E<gt>,\n'
386     author_string = author_string + author
387     pass
388     help_string = help_string + author_string[:-2] + '.'\
389     """
390    
391     =cut
392     """
393    
394     pod = tempfile.mktemp()+'.pod'
395     pod_file = open(pod, 'w')
396     pod_file.write(help_string)
397     pod_file.close()
398    
399     if option == 'man':
400     man = tempfile.mktemp()
401     pod2man = 'pod2man --center=" " --release=" " '+pod+' >'+man
402     os.system(pod2man)
403     os.system('man '+man)
404     pass
405     elif option == 'tex':
406     fname = common.prog_name+'-v'+common.prog_version_str
407     tex0 = tempfile.mktemp()+'.tex'
408     pod2tex = 'pod2latex -full -out '+tex0+' '+pod
409     os.system(pod2tex)
410     tex = fname+'.tex'
411     tex_old = open(tex0, 'r')
412     tex_new = open(tex, 'w')
413     for s in tex_old.readlines():
414     if string.find(s, '\\begin{document}') >= 0:
415     tex_new.write('\\title{'+common.prog_name+'\\\\'+
416     '(Version '+common.prog_version_str+')}\n')
417     tex_new.write('\\author{\n')
418     for auth in common.prog_authors:
419     tex_new.write(' '+auth[0]+
420     '\\thanks{'+auth[1]+'} \\\\\n')
421     tex_new.write('}\n')
422     tex_new.write('\\date{}\n')
423     elif string.find(s, '\\tableofcontents') >= 0:
424     tex_new.write('\\maketitle\n')
425     continue
426     elif string.find(s, '\\clearpage') >= 0:
427     continue
428     tex_new.write(s)
429     tex_old.close()
430     tex_new.close()
431     print 'See '+tex
432     pass
433     elif option == 'html':
434     fname = common.prog_name+'-v'+common.prog_version_str+'.html'
435     pod2html = 'pod2html --title='+common.prog_name+\
436     ' --infile='+pod+' --outfile='+fname
437     os.system(pod2html)
438     print 'See '+fname
439     pass
440    
441     sys.exit(0)