ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_orca_pubdb.py
Revision: 1.3
Committed: Fri Jun 23 15:10:39 2006 UTC (18 years, 10 months ago) by slacapra
Content type: text/x-python
Branch: MAIN
CVS Tags: CRAB_1_2_0_pre7
Changes since 1.2: +4 -4 lines
Log Message:
job splitting parameters are inside each jobtype section of crab.cfg

File Contents

# User Rev Content
1 slacapra 1.1 from JobType import JobType
2     from crab_logger import Logger
3     from crab_exceptions import *
4     from crab_util import *
5     import common
6     import PubDB
7     import orcarcBuilder
8     import orcarcBuilderOld
9     import Scram
10     import TarBall
11    
12     import os, string, re
13     import math
14    
15     class Orca_pubdb(JobType):
16     def __init__(self, cfg_params):
17 slacapra 1.2 JobType.__init__(self, 'ORCA_PUBDB')
18 slacapra 1.1 common.logger.debug(3,'ORCA_pubdb::__init__')
19    
20     self.analisys_common_info = {}
21     # Marco.
22     self._params = {}
23     self.cfg_params = cfg_params
24    
25     log = common.logger
26    
27     self.scram = Scram.Scram(cfg_params)
28     scramArea = ''
29     self.additional_inbox_files = []
30     self.scriptExe = ''
31    
32     self.version = self.scram.getSWVersion()
33     self.setParam_('application', self.version)
34     common.analisys_common_info['sw_version'] = self.version
35     common.analisys_common_info['copy_input_data'] = 0
36    
37     self.total_number_of_jobs = 0
38     self.job_number_of_events = 0
39    
40     ### collect Data cards
41     try:
42     self.owner = cfg_params['ORCA.owner']
43     self.setParam_('owner', self.owner)
44     log.debug(6, "Orca::Orca(): owner = "+self.owner)
45     self.dataset = cfg_params['ORCA.dataset']
46     self.setParam_('dataset', self.dataset)
47     log.debug(6, "Orca::Orca(): dataset = "+self.dataset)
48     except KeyError:
49     msg = "Error: owner and/or dataset not defined "
50     raise CrabException(msg)
51    
52     self.dataTiers = []
53     try:
54     tmpDataTiers = string.split(cfg_params['ORCA.data_tier'],',')
55     for tmp in tmpDataTiers:
56     tmp=string.strip(tmp)
57     self.dataTiers.append(tmp)
58     pass
59     pass
60     except KeyError:
61     pass
62     log.debug(6, "Orca_pubdb::Orca_pubdb(): dataTiers = "+str(self.dataTiers))
63    
64     ## now the application
65     try:
66     self.executable = cfg_params['ORCA.executable']
67     log.debug(6, "Orca_pubdb::Orca_pubdb(): executable = "+self.executable)
68     self.setParam_('exe', self.executable)
69     except KeyError:
70     msg = "Error: executable not defined "
71     raise CrabException(msg)
72    
73     try:
74     self.orcarc_file = cfg_params['ORCA.orcarc_file']
75     log.debug(6, "Orca_pubdb::Orca_pubdb(): orcarc file = "+self.orcarc_file)
76     if (not os.path.exists(self.orcarc_file)):
77     raise CrabException("User defined .orcarc file "+self.orcarc_file+" does not exist")
78     except KeyError:
79     log.message("Using empty orcarc file")
80     self.orcarc_file = ''
81    
82     # output files
83     try:
84     self.output_file = []
85    
86     tmp = cfg_params['ORCA.output_file']
87     if tmp != '':
88     tmpOutFiles = string.split(cfg_params['ORCA.output_file'],',')
89     log.debug(7, 'Orca_pubdb::Orca_pubdb(): output files '+str(tmpOutFiles))
90     for tmp in tmpOutFiles:
91     tmp=string.strip(tmp)
92     self.output_file.append(tmp)
93     pass
94    
95     else:
96     log.message("No output file defined: only stdout/err will be available")
97     pass
98     pass
99     except KeyError:
100     log.message("No output file defined: only stdout/err will be available")
101     pass
102    
103     # script_exe file as additional file in inputSandbox
104     try:
105     self.scriptExe = cfg_params['ORCA.script_exe']
106     self.additional_inbox_files.append(self.scriptExe)
107     except KeyError:
108     pass
109     if self.scriptExe != '':
110     if os.path.isfile(self.scriptExe):
111     pass
112     else:
113     log.message("WARNING. file "+self.scriptExe+" not found")
114     sys.exit()
115    
116     ## additional input files
117     try:
118     tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
119     for tmp in tmpAddFiles:
120     tmp=string.strip(tmp)
121     self.additional_inbox_files.append(tmp)
122     pass
123     pass
124     except KeyError:
125     pass
126    
127     try:
128 slacapra 1.3 self.total_number_of_events = int(cfg_params['ORCA.total_number_of_events'])
129 slacapra 1.1 except KeyError:
130     msg = 'Must define total_number_of_events and job_number_of_events'
131     raise CrabException(msg)
132    
133     try:
134 slacapra 1.3 self.first_event = int(cfg_params['ORCA.first_event'])
135 slacapra 1.1 except KeyError:
136     self.first_event = 0
137     pass
138     log.debug(6, "Orca_pubdb::Orca_pubdb(): total number of events = "+`self.total_number_of_events`)
139     #log.debug(6, "Orca_pubdb::Orca_pubdb(): events per job = "+`self.job_number_of_events`)
140     log.debug(6, "Orca_pubdb::Orca_pubdb(): first event = "+`self.first_event`)
141    
142     self.maxEvents=0 # max events available in any PubDB
143     self.connectPubDB(cfg_params)
144    
145     # [-- self.checkNevJobs() --]
146    
147     self.TarBaller = TarBall.TarBall(self.executable, self.scram)
148     self.tgzNameWithPath = self.TarBaller.prepareTarBall()
149    
150     try:
151     self.ML = int(cfg_params['USER.activate_monalisa'])
152     except KeyError:
153     self.ML = 0
154     pass
155    
156     self.setTaskid_()
157     self.setParam_('taskId', self.cfg_params['taskId'])
158    
159     return
160    
161     def split(self, jobParams):
162     """
163     This method returns the list of specific job type items
164     needed to run the jobs
165     """
166     common.jobDB.load()
167     njobs = self.total_number_of_jobs
168     # create the empty structure
169     for i in range(njobs):
170     jobParams.append("")
171    
172     # fill the both the list and the DB (part of the code taken from jobsToDB)
173     firstEvent = self.first_event
174     lastJobsNumberOfEvents = self.job_number_of_events
175     # last jobs is different...
176     for job in range(njobs-1):
177     jobParams[job] = [firstEvent, lastJobsNumberOfEvents]
178     common.jobDB.setArguments(job, jobParams[job])
179     firstEvent += self.job_number_of_events
180    
181     # this is the last job
182     lastJobsNumberOfEvents = (self.total_number_of_events + self.first_event) - firstEvent
183     status = common.jobDB.status(njobs - 1)
184     jobParams[njobs - 1] = [firstEvent, lastJobsNumberOfEvents]
185     common.jobDB.setArguments(njobs - 1, jobParams[njobs - 1])
186    
187     if (lastJobsNumberOfEvents!=self.job_number_of_events):
188     common.logger.message(str(self.total_number_of_jobs-1)+' jobs will be created for '+str(self.job_number_of_events)+' events each plus 1 for '+str(lastJobsNumberOfEvents)+' events for a total of '+str(self.job_number_of_events*(self.total_number_of_jobs-1)+lastJobsNumberOfEvents)+' events')
189     else:
190     common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for '+str(self.job_number_of_events)+' events each for a total of '+str(self.job_number_of_events*(self.total_number_of_jobs-1)+lastJobsNumberOfEvents)+' events')
191    
192     common.jobDB.save()
193     return
194    
195     def getJobTypeArguments(self, nj, sched):
196     params = common.jobDB.arguments(nj)
197    
198     if sched=="EDG" or sched=="GRID":
199     parString = "" + str(params[0])+' '+str(params[1])
200     elif sched=="BOSS":
201     parString = "" + str(params[0])+' '+str(params[1])
202     elif sched=="CONDOR":
203     parString = "" + str(params[0])+' '+str(params[1])+' '+common.analisys_common_info['sites'][0]
204     else:
205     return ""
206     return parString
207    
208     def numberOfJobs(self):
209     first_event = self.first_event
210     maxAvailableEvents = int(self.maxEvents)
211     common.logger.debug(1,"Available events: "+str(maxAvailableEvents))
212    
213     if first_event>=maxAvailableEvents:
214     raise CrabException('First event is bigger than maximum number of available events!')
215    
216     try:
217     n = self.total_number_of_events
218     if n == 'all': n = '-1'
219     if n == '-1':
220     tot_num_events = (maxAvailableEvents - first_event)
221     common.logger.debug(1,"Analysing all available events "+str(tot_num_events))
222     else:
223     if maxAvailableEvents<(int(n)+ first_event): # + self.first_event):
224     raise CrabException('(First event + total events)='+str(int(n)+first_event)+' is bigger than maximum number of available events '+str(maxAvailableEvents)+' !! Use "total_number_of_events=-1" to analyze to whole dataset')
225     tot_num_events = int(n)
226     except KeyError:
227     common.logger.message("total_number_of_events not defined, set it to maximum available")
228     tot_num_events = (maxAvailableEvents - first_event)
229     pass
230     common.logger.message("Total number of events to be analyzed: "+str(self.total_number_of_events))
231    
232    
233     # read user directives
234     eventPerJob=0
235     try:
236 slacapra 1.3 eventPerJob = self.cfg_params['ORCA.job_number_of_events']
237 slacapra 1.1 except KeyError:
238     pass
239    
240     jobsPerTask=0
241     try:
242 slacapra 1.3 jobsPerTask = int(self.cfg_params['ORCA.total_number_of_jobs'])
243 slacapra 1.1 except KeyError:
244     pass
245    
246     # If both the above set, complain and use event per jobs
247     if eventPerJob>0 and jobsPerTask>0:
248     msg = 'Warning. '
249     msg += 'job_number_of_events and total_number_of_jobs are both defined '
250     msg += 'Using job_number_of_events.'
251     common.logger.message(msg)
252     jobsPerTask = 0
253     if eventPerJob==0 and jobsPerTask==0:
254     msg = 'Warning. '
255     msg += 'job_number_of_events and total_number_of_jobs are not defined '
256     msg += 'Creating just one job for all events.'
257     common.logger.message(msg)
258     jobsPerTask = 1
259    
260     # first case: events per job defined
261     if eventPerJob>0:
262     n=eventPerJob
263     #if n == 'all' or n == '-1' or (int(n)>self.total_number_of_events and self.total_number_of_events>0):
264     if n == 'all' or n == '-1' or (int(n)>tot_num_events and tot_num_events>0):
265     common.logger.message("Asking more events than available: set it to maximum available")
266     job_num_events = tot_num_events
267     tot_num_jobs = 1
268     else:
269     job_num_events = int(n)
270     tot_num_jobs = int((tot_num_events-1)/job_num_events)+1
271    
272     elif jobsPerTask>0:
273     common.logger.debug(2,"total number of events: "+str(tot_num_events)+" JobPerTask "+str(jobsPerTask))
274     job_num_events = int(math.floor((tot_num_events)/jobsPerTask))
275     tot_num_jobs = jobsPerTask
276    
277     # should not happen...
278     else:
279     raise CrabException('Something wrong with splitting')
280    
281     common.logger.debug(2,"total number of events: "+str(tot_num_events)+" events per job: "+str(job_num_events))
282    
283     #used by jobsToDB for logs
284     self.job_number_of_events = job_num_events
285     self.total_number_of_jobs = tot_num_jobs
286     return tot_num_jobs
287    
288    
289     def jobsToDB(self, nJobs):
290     """
291     Fill the DB with proper entries for ORCA-DBS-DLS
292     """
293    
294     firstEvent = self.first_event
295     lastJobsNumberOfEvents = self.job_number_of_events
296    
297     # last jobs is different...
298     for job in range(nJobs-1):
299     common.jobDB.setFirstEvent(job, firstEvent)
300     common.jobDB.setMaxEvents(job, self.job_number_of_events)
301     firstEvent=firstEvent+self.job_number_of_events
302    
303     # this is the last job
304     common.jobDB.setFirstEvent(nJobs-1, firstEvent)
305     lastJobsNumberOfEvents= (self.total_number_of_events+self.first_event)-firstEvent
306     common.jobDB.setMaxEvents(nJobs-1, lastJobsNumberOfEvents)
307    
308     if (lastJobsNumberOfEvents!=self.job_number_of_events):
309     common.logger.message(str(self.total_number_of_jobs-1)+' jobs will be created for '+str(self.job_number_of_events)+' events each plus 1 for '+str(lastJobsNumberOfEvents)+' events for a total of '+str(self.job_number_of_events*(self.total_number_of_jobs-1)+lastJobsNumberOfEvents)+' events')
310     else:
311     common.logger.message(str(self.total_number_of_jobs)+' jobs will be created for '+str(self.job_number_of_events)+' events each for a total of '+str(self.job_number_of_events*(self.total_number_of_jobs-1)+lastJobsNumberOfEvents)+' events')
312    
313     return
314    
315    
316     def wsSetupEnvironment(self, nj):
317     """
318     Returns part of a job script which prepares
319     the execution environment for the job 'nj'.
320     """
321    
322     # Prepare JobType-independent part
323     txt = ''
324    
325     ## OLI_Daniele at this level middleware already known
326    
327     txt += 'if [ $middleware == LCG ]; then \n'
328     txt += self.wsSetupCMSLCGEnvironment_()
329     txt += 'elif [ $middleware == OSG ]; then\n'
330     txt += ' time=`date -u +"%s"`\n'
331     txt += ' WORKING_DIR=$OSG_WN_TMP/cms_$time\n'
332     txt += ' echo "Creating working directory: $WORKING_DIR"\n'
333     txt += ' /bin/mkdir -p $WORKING_DIR\n'
334     txt += ' if [ ! -d $WORKING_DIR ] ;then\n'
335     txt += ' echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
336     txt += ' echo "JOB_EXIT_STATUS = 10016"\n'
337     txt += ' echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
338     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
339     txt += ' exit 1\n'
340     txt += ' fi\n'
341     txt += '\n'
342     txt += ' echo "Change to working directory: $WORKING_DIR"\n'
343     txt += ' cd $WORKING_DIR\n'
344     txt += self.wsSetupCMSOSGEnvironment_()
345     txt += 'fi\n'
346    
347     # Prepare JobType-specific part
348     scram = self.scram.commandName()
349     txt += '\n\n'
350     txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
351     txt += scram+' project ORCA '+self.version+'\n'
352     txt += 'status=$?\n'
353     txt += 'if [ $status != 0 ] ; then\n'
354     txt += ' echo "SET_EXE_ENV 10034 ==>ERROR ORCA '+self.version+' not found on `hostname`" \n'
355     txt += ' echo "JOB_EXIT_STATUS = 10034"\n'
356     txt += ' echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
357     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
358    
359     ## OLI_Daniele
360     txt += ' if [ $middleware == OSG ]; then \n'
361     txt += ' \n'
362     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
363     txt += ' cd $RUNTIME_AREA\n'
364     txt += ' /bin/rm -rf $WORKING_DIR\n'
365     txt += ' if [ -d $WORKING_DIR ] ;then\n'
366     txt += ' echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
367     txt += ' echo "JOB_EXIT_STATUS = 10018"\n'
368     txt += ' echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
369     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
370     txt += ' fi\n'
371     txt += ' fi \n'
372     txt += ' \n'
373     txt += ' exit 1\n'
374     txt += 'fi \n'
375     txt += 'echo "ORCA_VERSION = '+self.version+'"\n'
376     txt += 'cd '+self.version+'\n'
377     ### needed grep for bug in scramv1 ###
378    
379     #txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
380    
381     # Handle the arguments:
382     txt += "\n"
383     txt += "## ARGUMNETS: $1 Job Number\n"
384     txt += "## ARGUMNETS: $2 First Event for this job\n"
385     txt += "## ARGUMNETS: $3 Max Event for this job\n"
386     txt += "\n"
387     txt += "narg=$#\n"
388     txt += "NJob=$1\n"
389     txt += "FirstEvent=$2\n"
390     txt += "MaxEvents=$3\n"
391     txt += "if [ $narg -lt 3 ]\n"
392     txt += "then\n"
393     txt += " echo 'SET_EXE_ENV 50113 ==> ERROR Too few arguments' +$narg+ \n"
394     txt += ' echo "JOB_EXIT_STATUS = 50113"\n'
395     txt += ' echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
396     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
397    
398     ## OLI_Daniele
399     txt += ' if [ $middleware == OSG ]; then \n'
400     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
401     txt += ' cd $RUNTIME_AREA\n'
402     txt += ' /bin/rm -rf $WORKING_DIR\n'
403     txt += ' if [ -d $WORKING_DIR ] ;then\n'
404     txt += ' echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
405     txt += ' echo "JOB_EXIT_STATUS = 50114"\n'
406     txt += ' echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
407     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
408     txt += ' fi\n'
409     txt += ' fi \n'
410     txt += ' \n'
411     txt += " exit 1\n"
412     txt += "fi\n"
413     txt += "\n"
414    
415     # Prepare job-specific part
416     job = common.job_list[nj]
417     orcarc = os.path.basename(job.configFilename())
418     txt += '\n'
419     txt += 'cp $RUNTIME_AREA/'+orcarc+' .orcarc\n'
420     txt += 'if [ -e $RUNTIME_AREA/orcarc_$CE ] ; then\n'
421     txt += ' cat $RUNTIME_AREA/orcarc_$CE .orcarc >> .orcarc_tmp\n'
422     txt += ' mv .orcarc_tmp .orcarc\n'
423     txt += 'fi\n'
424     txt += 'if [ -e $RUNTIME_AREA/init_$CE.sh ] ; then\n'
425     txt += ' cp $RUNTIME_AREA/init_$CE.sh init.sh\n'
426     txt += 'fi\n'
427    
428     if len(self.additional_inbox_files) > 0:
429     for file in self.additional_inbox_files:
430     file = os.path.basename(file)
431     txt += 'if [ -e $RUNTIME_AREA/'+file+' ] ; then\n'
432     txt += ' cp $RUNTIME_AREA/'+file+' .\n'
433     txt += ' chmod +x '+file+'\n'
434     txt += 'fi\n'
435     pass
436    
437     txt += '\n'
438     txt += 'chmod +x ./init.sh\n'
439     txt += './init.sh\n'
440     txt += 'exitStatus=$?\n'
441     txt += 'if [ $exitStatus != 0 ] ; then\n'
442     txt += ' echo "SET_EXE_ENV 20001 ==> ERROR StageIn init script failed"\n'
443     txt += ' echo "JOB_EXIT_STATUS = $exitStatus" \n'
444     txt += ' echo "JobExitCode=20001" | tee -a $RUNTIME_AREA/$repo\n'
445     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
446    
447     ### OLI_DANIELE
448     txt += ' if [ $middleware == OSG ]; then \n'
449     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
450     txt += ' cd $RUNTIME_AREA\n'
451     txt += ' /bin/rm -rf $WORKING_DIR\n'
452     txt += ' if [ -d $WORKING_DIR ] ;then\n'
453     txt += ' echo "SET_EXE 10012 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after StageIn init script failed"\n'
454     txt += ' echo "JOB_EXIT_STATUS = 10012"\n'
455     txt += ' echo "JobExitCode=10012" | tee -a $RUNTIME_AREA/$repo\n'
456     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
457     txt += ' fi\n'
458     txt += ' fi \n'
459     txt += ' exit 1\n'
460     txt += 'fi\n'
461     txt += "echo 'SET_EXE_ENV 0 ==> job setup ok'\n"
462     txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
463    
464     txt += 'echo "FirstEvent=$FirstEvent" >> .orcarc\n'
465     txt += 'echo "MaxEvents=$MaxEvents" >> .orcarc\n'
466     if self.ML:
467     txt += 'echo "MonalisaJobId=$NJob" >> .orcarc\n'
468    
469     txt += '\n'
470     txt += 'echo "***** cat .orcarc *********"\n'
471     txt += 'cat .orcarc\n'
472     txt += 'echo "****** end .orcarc ********"\n'
473     return txt
474    
475     def wsBuildExe(self, nj):
476     """
477     Put in the script the commands to build an executable
478     or a library.
479     """
480    
481     txt = ""
482    
483     if os.path.isfile(self.tgzNameWithPath):
484     txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
485     txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
486     txt += 'untar_status=$? \n'
487     txt += 'if [ $untar_status -ne 0 ]; then \n'
488     txt += ' echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
489     txt += ' echo "JOB_EXIT_STATUS = $untar_status" \n'
490     txt += ' echo "JobExitCode=$untar_status" | tee -a $repo\n'
491    
492     ### OLI_DANIELE
493     txt += ' if [ $middleware == OSG ]; then \n'
494     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
495     txt += ' cd $RUNTIME_AREA\n'
496     txt += ' /bin/rm -rf $WORKING_DIR\n'
497     txt += ' if [ -d $WORKING_DIR ] ;then\n'
498     txt += ' echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
499     txt += ' echo "JOB_EXIT_STATUS = 50999"\n'
500     txt += ' echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
501     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
502     txt += ' fi\n'
503     txt += ' fi \n'
504     txt += ' \n'
505     txt += ' exit 1 \n'
506     txt += 'else \n'
507     txt += ' echo "Successful untar" \n'
508     txt += 'fi \n'
509     # TODO: what does this code do here ?
510     # SL check that lib/Linux__... is present
511     txt += 'mkdir -p lib/${SCRAM_ARCH} \n'
512     pass
513     txt += 'eval `'+self.scram.commandName()+' runtime -sh |grep -v SCRAMRT_LSB_JOBNAME`'+'\n'
514    
515     return txt
516    
517     def wsRenameOutput(self, nj):
518     """
519     Returns part of a job script which renames the produced files.
520     """
521    
522     txt = '\n'
523     txt += '# directory content\n'
524     txt += 'ls \n'
525     file_list = ''
526     for fileWithSuffix in self.output_file:
527     output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
528     file_list=file_list+output_file_num+' '
529     txt += '\n'
530     txt += 'ls '+fileWithSuffix+'\n'
531     txt += 'exe_result=$?\n'
532     txt += 'if [ $exe_result -ne 0 ] ; then\n'
533     txt += ' echo "ERROR: No output file to manage"\n'
534     txt += ' echo "JOB_EXIT_STATUS = $exe_result"\n'
535     txt += ' echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
536     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
537    
538     ### OLI_DANIELE
539     if common.scheduler.boss_scheduler_name == 'condor_g':
540     txt += ' if [ $middleware == OSG ]; then \n'
541     txt += ' echo "prepare dummy output file"\n'
542     txt += ' echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
543     txt += ' fi \n'
544     txt += 'else\n'
545     txt += ' cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
546     txt += 'fi\n'
547    
548     pass
549    
550    
551     txt += 'cd $RUNTIME_AREA\n'
552     file_list=file_list[:-1]
553     txt += 'file_list="'+file_list+'"\n'
554     ### OLI_DANIELE
555     txt += 'if [ $middleware == OSG ]; then\n'
556     txt += ' cd $RUNTIME_AREA\n'
557     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
558     txt += ' /bin/rm -rf $WORKING_DIR\n'
559     txt += ' if [ -d $WORKING_DIR ] ;then\n'
560     txt += ' echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
561     txt += ' echo "JOB_EXIT_STATUS = 60999"\n'
562     txt += ' echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
563     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
564     txt += ' fi\n'
565     txt += 'fi\n'
566     txt += '\n'
567    
568     return txt
569    
570     def executableName(self):
571     if self.scriptExe != '':
572     return "./" + os.path.basename(self.scriptExe)
573     else:
574     return self.executable
575    
576     def connectPubDB(self, cfg_params):
577    
578     fun = "Orca_pubdb::connectPubDB()"
579    
580     self.allOrcarcs = []
581     # first check if the info from PubDB have been already processed
582     if os.path.exists(common.work_space.shareDir()+'PubDBSummaryFile') :
583     common.logger.debug(6, fun+": info from PubDB has been already processed -- use it")
584     f = open( common.work_space.shareDir()+'PubDBSummaryFile', 'r' )
585     for i in f.readlines():
586     a=string.split(i,' ')
587     self.allOrcarcs.append(orcarcBuilderOld.constructFromFile(a[0:-1]))
588     pass
589     for o in self.allOrcarcs:
590     # o.dump()
591     if o.Nevents >= self.maxEvents:
592     self.maxEvents= o.Nevents
593     pass
594     pass
595     pass
596    
597     else: # PubDB never queried
598     common.logger.debug(6, fun+": PubDB was never queried -- do it")
599     # New PubDB class by SL
600     try:
601     self.pubdb = PubDB.PubDB(self.owner,
602     self.dataset,
603     self.dataTiers,
604     cfg_params)
605     except PubDB.RefDBmapError:
606     msg = 'ERROR ***: accessing PubDB'
607     raise CrabException(msg)
608    
609     ## extract info from pubDB (grouped by PubDB version :
610     ## pubDBData contains a list of info for the new-style PubDBs,
611     ## and a list of info for the old-style PubDBs )
612     self.pubDBData = self.pubdb.getAllPubDBData()
613    
614     ## check and exit if no data are published in any PubDB
615     nodata=1
616     for PubDBversion in self.pubDBData.keys():
617     if len(self.pubDBData[PubDBversion])>0:
618     nodata=0
619     if (nodata):
620     msg = 'Owner '+self.owner+' Dataset '+ self.dataset+ ' not published in any PubDB with asked dataTiers '+string.join(self.dataTiers,'-')+' ! '
621     raise CrabException(msg)
622    
623     ## logging PubDB content for debugging
624     for PubDBversion in self.pubDBData.keys():
625     common.logger.debug(6, fun+": PubDB "+PubDBversion+" info ("+`len(self.pubDBData[PubDBversion])`+"):\/")
626     for aa in self.pubDBData[PubDBversion]:
627     common.logger.debug(6, "---------- start of a PubDB")
628     for bb in aa:
629     if common.logger.debugLevel() >= 6 :
630     common.logger.debug(6, str(bb.dump()))
631     pass
632     pass
633     common.logger.debug(6, "----------- end of a PubDB")
634     common.logger.debug(6, fun+": End of PubDB "+PubDBversion+" info\n")
635    
636    
637     ## building orcarc : switch between info from old and new-style PubDB
638     currDir = os.getcwd()
639     os.chdir(common.work_space.jobDir())
640    
641     tmpOrcarcList=[]
642     for PubDBversion in self.pubDBData.keys():
643     if len(self.pubDBData[PubDBversion])>0 :
644     #print (" PubDB-style : %s"%(PubDBversion))
645     if PubDBversion=='newPubDB' :
646     self.builder = orcarcBuilder.orcarcBuilder(cfg_params)
647     else :
648     self.builder = orcarcBuilderOld.orcarcBuilderOld()
649     tmpAllOrcarcs = self.builder.createOrcarcAndInit(self.pubDBData[PubDBversion])
650     tmpOrcarcList.append(tmpAllOrcarcs)
651     #print 'version ',PubDBversion,' tmpAllOrcarcs ', tmpAllOrcarcs
652    
653     #print tmpOrcarcList
654     os.chdir(currDir)
655    
656     self.maxEvents=0
657     for tmpAllOrcarcs in tmpOrcarcList:
658     for o in tmpAllOrcarcs:
659     numEvReq=self.total_number_of_events
660     if ((numEvReq == '-1') | (numEvReq <= o.Nevents)):
661     self.allOrcarcs.append(o)
662     if (int(o.Nevents) >= self.maxEvents):
663     self.maxEvents= int(o.Nevents)
664     pass
665     pass
666     pass
667    
668     # set maximum number of event available
669    
670     # I save to a file self.allOrcarcs
671    
672     PubDBSummaryFile = open(common.work_space.shareDir()+'PubDBSummaryFile','w')
673     for o in self.allOrcarcs:
674     for d in o.content():
675     PubDBSummaryFile.write(d)
676     PubDBSummaryFile.write(' ')
677     pass
678     PubDBSummaryFile.write('\n')
679     pass
680     PubDBSummaryFile.close()
681     ### fede
682     #for o in self.allOrcarcs:
683     # o.dump()
684     pass
685    
686     # build a list of sites
687     ces= []
688     for o in self.allOrcarcs:
689     ces.append(o.CE)
690     pass
691    
692     if len(ces)==0:
693     msg = 'No PubDBs publish correct catalogs or enough events! '
694     msg += `self.total_number_of_events`
695     raise CrabException(msg)
696    
697     common.logger.debug(6, "List of CEs: "+str(ces))
698     common.analisys_common_info['sites'] = ces
699     self.setParam_('TargetCE', ','.join(ces))
700    
701     return
702    
703     def nJobs(self):
704     # TODO: should not be here !
705     # JobType should have no internal knowledge about submitted jobs
706     # One possibility is to use len(common.job_list).
707     """ return the number of job to be created """
708     return len(common.job_list)
709    
710     def prepareSteeringCards(self):
711     """
712     modify the orcarc card provided by the user,
713     writing a new card into share dir
714     """
715     infile = ''
716     try:
717     infile = open(self.orcarc_file,'r')
718     except:
719     self.orcarc_file = 'empty.orcarc'
720     cmd='touch '+self.orcarc_file
721     runCommand(cmd)
722     infile = open(self.orcarc_file,'r')
723    
724     outfile = open(common.work_space.jobDir()+self.name()+'.orcarc', 'w')
725    
726     inline=infile.readlines()
727     ### remove from user card these lines ###
728     wordRemove=['InputFileCatalogURL', 'InputCollections', 'FirstEvent', 'MaxEvents', 'TFileAdaptor', 'MonRecAlisaBuilder']
729     for line in inline:
730     word = string.strip(string.split(line,'=')[0])
731    
732     if word not in wordRemove:
733     outfile.write(line)
734     else:
735     continue
736     pass
737    
738     outfile.write('\n\n##### The following cards have been created by CRAB: DO NOT TOUCH #####\n')
739     outfile.write('TFileAdaptor = true\n')
740    
741     outfile.write('MonRecAlisaBuilder=false\n')
742    
743     outfile.write('InputCollections=/System/'+self.owner+'/'+self.dataset+'/'+self.dataset+'\n')
744    
745     infile.close()
746     outfile.close()
747     return
748    
749     def modifySteeringCards(self, nj):
750     """
751     Creates steering cards file modifying a template file
752     """
753     return
754    
755     def cardsBaseName(self):
756     """
757     Returns name of user orcarc card-file
758     """
759     return os.path.split (self.orcarc_file)[1]
760    
761     ### content of input_sanbdox ...
762     def inputSandbox(self, nj):
763     """
764     Returns a list of filenames to be put in JDL input sandbox.
765     """
766     inp_box = []
767     # dict added to delete duplicate from input sandbox file list
768     seen = {}
769     ## code
770     if os.path.isfile(self.tgzNameWithPath):
771     inp_box.append(self.tgzNameWithPath)
772     ## orcarc
773     for o in self.allOrcarcs:
774     for f in o.fileList():
775     if (f not in seen.keys()):
776     inp_box.append(common.work_space.jobDir()+f)
777     seen[f] = 1
778    
779     ## config
780     inp_box.append(common.job_list[nj].configFilename())
781     ## additional input files
782     #inp_box = inp_box + self.additional_inbox_files
783     return inp_box
784    
785     ### and of output_sandbox
786     def outputSandbox(self, nj):
787     """
788     Returns a list of filenames to be put in JDL output sandbox.
789     """
790     out_box = []
791    
792     stdout=common.job_list[nj].stdout()
793     stderr=common.job_list[nj].stderr()
794     #out_box.append(stdout)
795     #out_box.append(stderr)
796    
797     ## User Declared output files
798     for out in self.output_file:
799     n_out = nj + 1
800     #FEDE
801     #out_box.append(self.version+'/'+self.numberFile_(out,str(n_out)))
802     out_box.append(self.numberFile_(out,str(n_out)))
803     return out_box
804    
805     def getRequirements(self):
806     """
807     return job requirements to add to jdl files
808     """
809     req = ''
810     if common.analisys_common_info['sites']:
811     if common.analisys_common_info['sw_version']:
812     req='Member("VO-cms-' + \
813     common.analisys_common_info['sw_version'] + \
814     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
815     if len(common.analisys_common_info['sites'])>0:
816     req = req + ' && ('
817     for i in range(len(common.analisys_common_info['sites'])):
818     req = req + 'other.GlueCEInfoHostName == "' \
819     + common.analisys_common_info['sites'][i] + '"'
820     if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
821     req = req + ' || '
822     req = req + ')'
823     #print "req = ", req
824     return req
825    
826     def numberFile_(self, file, txt):
827     """
828     append _'txt' before last extension of a file
829     """
830     p = string.split(file,".")
831     # take away last extension
832     name = p[0]
833     for x in p[1:-1]:
834     name=name+"."+x
835     # add "_txt"
836     if len(p)>1:
837     ext = p[len(p)-1]
838     #result = name + '_' + str(txt) + "." + ext
839     result = name + '_' + txt + "." + ext
840     else:
841     #result = name + '_' + str(txt)
842     result = name + '_' + txt
843    
844     return result
845    
846    
847     def stdOut(self):
848     return self.stdOut_
849    
850     def stdErr(self):
851     return self.stdErr_
852    
853     # marco
854     def setParam_(self, param, value):
855     self._params[param] = value
856    
857     def getParams(self):
858     return self._params
859    
860     def setTaskid_(self):
861     self._taskId = self.cfg_params['taskId']
862    
863     def getTaskid(self):
864     return self._taskId
865     # marco
866    
867     def configFilename(self):
868     """ return the config filename """
869     return self.name()+'.orcarc'
870    
871     ### OLI_DANIELE
872     def wsSetupCMSOSGEnvironment_(self):
873     """
874     Returns part of a job script which is prepares
875     the execution environment and which is common for all CMS jobs.
876     """
877     txt = '\n'
878     txt += ' echo "### SETUP CMS OSG ENVIRONMENT ###"\n'
879     txt += ' if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
880     txt += ' # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
881     txt += ' source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
882     txt += ' elif [ -f $OSG_APP/cmssoft/cmsset_default.sh ] ;then\n'
883     txt += ' # Use $OSG_APP/cmssoft/cmsset_default.sh to setup cms software\n'
884     txt += ' source $OSG_APP/cmssoft/cmsset_default.sh '+self.version+'\n'
885     txt += ' else\n'
886     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
887     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
888     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
889     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
890     txt += ' exit 1\n'
891     txt += '\n'
892     txt += ' echo "Remove working directory: $WORKING_DIR"\n'
893     txt += ' cd $RUNTIME_AREA\n'
894     txt += ' /bin/rm -rf $WORKING_DIR\n'
895     txt += ' if [ -d $WORKING_DIR ] ;then\n'
896     txt += ' echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cmsset_default.sh file not found"\n'
897     txt += ' echo "JOB_EXIT_STATUS = 10017"\n'
898     txt += ' echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
899     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
900     txt += ' fi\n'
901     txt += '\n'
902     txt += ' exit 1\n'
903     txt += ' fi\n'
904     txt += '\n'
905     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
906     txt += ' echo " END SETUP CMS OSG ENVIRONMENT "\n'
907    
908     return txt
909    
910     ### OLI_DANIELE
911     def wsSetupCMSLCGEnvironment_(self):
912     """
913     Returns part of a job script which is prepares
914     the execution environment and which is common for all CMS jobs.
915     """
916     txt = ' \n'
917     txt += ' echo " ### SETUP CMS LCG ENVIRONMENT ### "\n'
918     txt += ' echo "JOB_EXIT_STATUS = 0"\n'
919     txt += ' if [ ! $VO_CMS_SW_DIR ] ;then\n'
920     txt += ' echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
921     txt += ' echo "JOB_EXIT_STATUS = 10031" \n'
922     txt += ' echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
923     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
924     txt += ' exit 1\n'
925     txt += ' else\n'
926     txt += ' echo "Sourcing environment... "\n'
927     txt += ' if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
928     txt += ' echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
929     txt += ' echo "JOB_EXIT_STATUS = 10020"\n'
930     txt += ' echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
931     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
932     txt += ' exit 1\n'
933     txt += ' fi\n'
934     txt += ' echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
935     txt += ' source $VO_CMS_SW_DIR/cmsset_default.sh\n'
936     txt += ' result=$?\n'
937     txt += ' if [ $result -ne 0 ]; then\n'
938     txt += ' echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
939     txt += ' echo "JOB_EXIT_STATUS = 10032"\n'
940     txt += ' echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
941     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
942     txt += ' exit 1\n'
943     txt += ' fi\n'
944     txt += ' fi\n'
945     txt += ' \n'
946     txt += ' string=`cat /etc/redhat-release`\n'
947     txt += ' echo $string\n'
948     txt += ' if [[ $string = *alhalla* ]]; then\n'
949     txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
950     txt += ' elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
951     txt += ' export SCRAM_ARCH=slc3_ia32_gcc323\n'
952     txt += ' echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
953     txt += ' else\n'
954     txt += ' echo "SET_CMS_ENV 1 ==> ERROR OS unknown, LCG environment not initialized"\n'
955     txt += ' echo "JOB_EXIT_STATUS = 10033"\n'
956     txt += ' echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
957     txt += ' dumpStatus $RUNTIME_AREA/$repo\n'
958     txt += ' exit 1\n'
959     txt += ' fi\n'
960     txt += ' echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
961     txt += ' echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
962     return txt
963