ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.52 by slacapra, Tue Oct 17 11:54:02 2006 UTC vs.
Revision 1.365 by spiga, Tue Nov 9 21:10:07 2010 UTC

# Line 1 | Line 1
1 +
2 + __revision__ = "$Id$"
3 + __version__ = "$Revision$"
4 +
5   from JobType import JobType
2 from crab_logger import Logger
6   from crab_exceptions import *
7   from crab_util import *
5 import math
8   import common
9 < import PsetManipulator  
8 <
9 < import DBSInfo
10 < import DataDiscovery
11 < import DataLocation
9 > import re
10   import Scram
11 <
12 < import glob, os, string, re
11 > from Splitter import JobSplitter
12 > from Downloader import Downloader
13 > try:
14 >    import json
15 > except:
16 >    import simplejson as json
17 >
18 > from IMProv.IMProvNode import IMProvNode
19 > from IMProv.IMProvLoader import loadIMProvFile
20 > import os, string, glob
21 > from xml.dom import pulldom
22  
23   class Cmssw(JobType):
24 <    def __init__(self, cfg_params, ncjobs):
24 >    def __init__(self, cfg_params, ncjobs,skip_blocks, isNew):
25          JobType.__init__(self, 'CMSSW')
26 <        common.logger.debug(3,'CMSSW::__init__')
27 <
28 <        # Marco.
26 >        common.logger.debug('CMSSW::__init__')
27 >        self.skip_blocks = skip_blocks
28 >        self.argsList = 2
29 >        self.NumEvents=0
30          self._params = {}
31          self.cfg_params = cfg_params
32 +        ### FEDE FOR MULTI ###
33 +        self.var_filter=''
34 +
35 +        ### Temporary patch to automatically skip the ISB size check:
36 +        self.server = self.cfg_params.get('CRAB.server_name',None) or \
37 +                      self.cfg_params.get('CRAB.use_server',0)
38 +        self.local  = common.scheduler.name().upper() in ['LSF','CAF','CONDOR','SGE','PBS']
39 +        size = 9.5
40 +        if self.server :
41 +            size = 1000
42 +        elif self.local:
43 +            size = 9999999
44 +        self.MaxTarBallSize = float(self.cfg_params.get('GRID.maxtarballsize',size))
45  
46          # number of jobs requested to be created, limit obj splitting
47          self.ncjobs = ncjobs
48  
28        log = common.logger
29        
49          self.scram = Scram.Scram(cfg_params)
31        scramArea = ''
50          self.additional_inbox_files = []
51          self.scriptExe = ''
52          self.executable = ''
53 +        self.executable_arch = self.scram.getArch()
54          self.tgz_name = 'default.tgz'
55 <        self.pset = ''      #scrip use case Da  
56 <        self.datasetPath = '' #scrip use case Da
55 >        self.scriptName = 'CMSSW.sh'
56 >        self.pset = ''
57 >        self.datasetPath = ''
58  
59 +        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
60          # set FJR file name
61          self.fjrFileName = 'crab_fjr.xml'
62  
63          self.version = self.scram.getSWVersion()
64 <        self.setParam_('application', self.version)
65 <
66 <        ### collect Data cards
64 >        common.logger.log(10-1,"CMSSW version is: "+str(self.version))
65 >        version_array = self.version.split('_')
66 >        self.CMSSW_major = 0
67 >        self.CMSSW_minor = 0
68 >        self.CMSSW_patch = 0
69          try:
70 <            tmp =  cfg_params['CMSSW.datasetpath']
71 <            log.debug(6, "CMSSW::CMSSW(): datasetPath = "+tmp)
72 <            if string.lower(tmp)=='none':
73 <                self.datasetPath = None
74 <                self.selectNoInput = 1
52 <            else:
53 <                self.datasetPath = tmp
54 <                self.selectNoInput = 0
55 <        except KeyError:
56 <            msg = "Error: datasetpath not defined "  
70 >            self.CMSSW_major = int(version_array[1])
71 >            self.CMSSW_minor = int(version_array[2])
72 >            self.CMSSW_patch = int(version_array[3])
73 >        except:
74 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
75              raise CrabException(msg)
76  
77 <        # ML monitoring
78 <        # split dataset path style: /PreProdR3Minbias/SIM/GEN-SIM
79 <        if not self.datasetPath:
80 <            self.setParam_('dataset', 'None')
81 <            self.setParam_('owner', 'None')
82 <        else:
83 <            datasetpath_split = self.datasetPath.split("/")
84 <            self.setParam_('dataset', datasetpath_split[1])
85 <            self.setParam_('owner', datasetpath_split[-1])
77 >        if self.CMSSW_major < 2 or (self.CMSSW_major == 2 and self.CMSSW_minor < 1):
78 >            msg = "CRAB supports CMSSW >= 2_1_x only. Use an older CRAB version."
79 >            raise CrabException(msg)
80 >            """
81 >            As CMSSW versions are dropped we can drop more code:
82 >            2.x dropped: drop check for lumi range setting
83 >            """
84 >        self.checkCMSSWVersion()
85 >        ### collect Data cards
86  
87 <        self.setTaskid_()
88 <        self.setParam_('taskId', self.cfg_params['taskId'])
87 >        ### Temporary: added to remove input file control in the case of PU
88 >        self.dataset_pu = cfg_params.get('CMSSW.dataset_pu', None)
89  
90 <        self.dataTiers = []
90 >        tmp =  cfg_params['CMSSW.datasetpath']
91 >        common.logger.log(10-1, "CMSSW::CMSSW(): datasetPath = "+tmp)
92 >
93 >        if tmp =='':
94 >            msg = "Error: datasetpath not defined "
95 >            raise CrabException(msg)
96 >        elif string.lower(tmp)=='none':
97 >            self.datasetPath = None
98 >            self.selectNoInput = 1
99 >            self.primaryDataset = 'null'
100 >        else:
101 >            self.datasetPath = tmp
102 >            self.selectNoInput = 0
103 >            ll = len(self.datasetPath.split("/"))
104 >            if (ll < 4):
105 >                msg = 'Your datasetpath has a invalid format ' + self.datasetPath + '\n'
106 >                msg += 'Expected a path in format /PRIMARY/PROCESSED/TIER1-TIER2 or /PRIMARY/PROCESSED/TIER/METHOD for ADS'
107 >                raise CrabException(msg)
108 >            self.primaryDataset = self.datasetPath.split("/")[1]
109 >            self.dataTier = self.datasetPath.split("/")[2]
110 >
111 >        # Analysis dataset is primary/processed/tier/definition
112 >        self.ads = False
113 >        if self.datasetPath:
114 >            self.ads = len(self.datasetPath.split("/")) > 4
115 >        self.lumiMask = self.cfg_params.get('CMSSW.lumi_mask',None)
116 >        self.lumiParams = self.cfg_params.get('CMSSW.total_number_of_lumis',None) or \
117 >                          self.cfg_params.get('CMSSW.lumis_per_job',None)
118 >
119 >        # FUTURE: Can remove this check
120 >        if self.ads and self.CMSSW_major < 3:
121 >            common.logger.info('Warning: Analysis dataset support is incomplete in CMSSW 2_x.')
122 >            common.logger.info('  Only file level, not lumi level, granularity is supported.')
123 >
124 >        self.debugWrap=''
125 >        self.debug_wrapper = int(cfg_params.get('USER.debug_wrapper',0))
126 >        if self.debug_wrapper == 1: self.debugWrap='--debug'
127  
128          ## now the application
129 <        try:
130 <            self.executable = cfg_params['CMSSW.executable']
131 <            self.setParam_('exe', self.executable)
132 <            log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
79 <            msg = "Default executable cmsRun overridden. Switch to " + self.executable
80 <            log.debug(3,msg)
81 <        except KeyError:
82 <            self.executable = 'cmsRun'
83 <            self.setParam_('exe', self.executable)
84 <            msg = "User executable not defined. Use cmsRun"
85 <            log.debug(3,msg)
86 <            pass
129 >        self.managedGenerators = ['madgraph', 'comphep', 'lhe']
130 >        self.generator = cfg_params.get('CMSSW.generator','pythia').lower()
131 >        self.executable = cfg_params.get('CMSSW.executable','cmsRun')
132 >        common.logger.log(10-1, "CMSSW::CMSSW(): executable = "+self.executable)
133  
134 <        try:
89 <            self.pset = cfg_params['CMSSW.pset']
90 <            log.debug(6, "Cmssw::Cmssw(): PSet file = "+self.pset)
91 <            if self.pset.lower() != 'none' :
92 <                if (not os.path.exists(self.pset)):
93 <                    raise CrabException("User defined PSet file "+self.pset+" does not exist")
94 <            else:
95 <                self.pset = None
96 <        except KeyError:
134 >        if not cfg_params.has_key('CMSSW.pset'):
135              raise CrabException("PSet file missing. Cannot run cmsRun ")
136 +        self.pset = cfg_params['CMSSW.pset']
137 +        common.logger.log(10-1, "Cmssw::Cmssw(): PSet file = "+self.pset)
138 +        if self.pset.lower() != 'none' :
139 +            if (not os.path.exists(self.pset)):
140 +                raise CrabException("User defined PSet file "+self.pset+" does not exist")
141 +        else:
142 +            self.pset = None
143  
144          # output files
145 <        try:
146 <            self.output_file = []
145 >        ## stuff which must be returned always via sandbox
146 >        self.output_file_sandbox = []
147  
148 <            # add fjr report by default
149 <            self.output_file.append(self.fjrFileName)
148 >        # add fjr report by default via sandbox
149 >        self.output_file_sandbox.append(self.fjrFileName)
150  
151 <            tmp = cfg_params['CMSSW.output_file']
152 <            if tmp != '':
153 <                tmpOutFiles = string.split(cfg_params['CMSSW.output_file'],',')
154 <                log.debug(7, 'cmssw::cmssw(): output files '+str(tmpOutFiles))
155 <                for tmp in tmpOutFiles:
156 <                    tmp=string.strip(tmp)
157 <                    self.output_file.append(tmp)
158 <                    pass
159 <            else:
160 <                log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
161 <                pass
162 <            pass
163 <        except KeyError:
164 <            log.message("No output file defined: only stdout/err and the CRAB Framework Job Report will be available")
165 <            pass
151 >        # other output files to be returned via sandbox or copied to SE
152 >        outfileflag = False
153 >        self.output_file = []
154 >        tmp = cfg_params.get('CMSSW.output_file',None)
155 >        if tmp :
156 >            self.output_file = [x.strip() for x in tmp.split(',')]
157 >            outfileflag = True #output found
158 >
159 >        self.scriptExe = cfg_params.get('USER.script_exe',None)
160 >        if self.scriptExe :
161 >            if not os.path.isfile(self.scriptExe):
162 >                msg ="ERROR. file "+self.scriptExe+" not found"
163 >                raise CrabException(msg)
164 >            self.additional_inbox_files.append(string.strip(self.scriptExe))
165 >
166 >        self.AdditionalArgs = cfg_params.get('USER.script_arguments',None)
167 >        if self.AdditionalArgs : self.AdditionalArgs = string.replace(self.AdditionalArgs,',',' ')
168  
122        # script_exe file as additional file in inputSandbox
123        try:
124            self.scriptExe = cfg_params['USER.script_exe']
125            if self.scriptExe != '':
126               if not os.path.isfile(self.scriptExe):
127                  msg ="WARNING. file "+self.scriptExe+" not found"
128                  raise CrabException(msg)
129               self.additional_inbox_files.append(string.strip(self.scriptExe))
130        except KeyError:
131            self.scriptExe = ''
132        #CarlosDaniele
169          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
170 <           msg ="WARNING. script_exe  not defined"
171 <           raise CrabException(msg)
170 >            msg ="Error. script_exe  not defined"
171 >            raise CrabException(msg)
172 >
173 >        # use parent files...
174 >        self.useParent = int(self.cfg_params.get('CMSSW.use_parent',0))
175  
176          ## additional input files
177 <        try:
177 >        if cfg_params.has_key('USER.additional_input_files'):
178              tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',')
179              for tmp in tmpAddFiles:
180                  tmp = string.strip(tmp)
181                  dirname = ''
182                  if not tmp[0]=="/": dirname = "."
183 <                files = glob.glob(os.path.join(dirname, tmp))
183 >                files = []
184 >                if string.find(tmp,"*")>-1:
185 >                    files = glob.glob(os.path.join(dirname, tmp))
186 >                    if len(files)==0:
187 >                        raise CrabException("No additional input file found with this pattern: "+tmp)
188 >                else:
189 >                    files.append(tmp)
190                  for file in files:
191                      if not os.path.exists(file):
192                          raise CrabException("Additional input file not found: "+file)
# Line 149 | Line 194 | class Cmssw(JobType):
194                      self.additional_inbox_files.append(string.strip(file))
195                  pass
196              pass
197 <            common.logger.debug(5,"Additional input files: "+str(self.additional_inbox_files))
198 <        except KeyError:
154 <            pass
155 <
156 <        # files per job
157 <        try:
158 <            if (cfg_params['CMSSW.files_per_jobs']):
159 <                raise CrabException("files_per_jobs no longer supported.  Quitting.")
160 <        except KeyError:
161 <            pass
162 <
163 <        ## Events per job
164 <        try:
165 <            self.eventsPerJob =int( cfg_params['CMSSW.events_per_job'])
166 <            self.selectEventsPerJob = 1
167 <        except KeyError:
168 <            self.eventsPerJob = -1
169 <            self.selectEventsPerJob = 0
170 <    
171 <        ## number of jobs
172 <        try:
173 <            self.theNumberOfJobs =int( cfg_params['CMSSW.number_of_jobs'])
174 <            self.selectNumberOfJobs = 1
175 <        except KeyError:
176 <            self.theNumberOfJobs = 0
177 <            self.selectNumberOfJobs = 0
197 >            common.logger.debug("Additional input files: "+str(self.additional_inbox_files))
198 >        pass
199  
179        try:
180            self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
181            self.selectTotalNumberEvents = 1
182        except KeyError:
183            self.total_number_of_events = 0
184            self.selectTotalNumberEvents = 0
185
186        if self.pset != None: #CarlosDaniele
187             if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
188                 msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
189                 raise CrabException(msg)
190        else:
191             if (self.selectNumberOfJobs == 0):
192                 msg = 'Must specify  number_of_jobs.'
193                 raise CrabException(msg)
194
195        ## source seed for pythia
196        try:
197            self.sourceSeed = int(cfg_params['CMSSW.pythia_seed'])
198        except KeyError:
199            self.sourceSeed = None
200            common.logger.debug(5,"No seed given")
200  
201 <        try:
202 <            self.sourceSeedVtx = int(cfg_params['CMSSW.vtx_seed'])
203 <        except KeyError:
204 <            self.sourceSeedVtx = None
205 <            common.logger.debug(5,"No vertex seed given")
206 <        if self.pset != None: #CarlosDaniele
207 <            self.PsetEdit = PsetManipulator.PsetManipulator(self.pset) #Daniele Pset
201 >        ## New method of dealing with seeds
202 >        self.incrementSeeds = []
203 >        self.preserveSeeds = []
204 >        if cfg_params.has_key('CMSSW.preserve_seeds'):
205 >            tmpList = cfg_params['CMSSW.preserve_seeds'].split(',')
206 >            for tmp in tmpList:
207 >                tmp.strip()
208 >                self.preserveSeeds.append(tmp)
209 >        if cfg_params.has_key('CMSSW.increment_seeds'):
210 >            tmpList = cfg_params['CMSSW.increment_seeds'].split(',')
211 >            for tmp in tmpList:
212 >                tmp.strip()
213 >                self.incrementSeeds.append(tmp)
214 >
215 >        # Copy/return/publish
216 >        self.copy_data = int(cfg_params.get('USER.copy_data',0))
217 >        self.return_data = int(cfg_params.get('USER.return_data',0))
218 >        self.publish_data = int(cfg_params.get('USER.publish_data',0))
219 >        if (self.publish_data == 1):
220 >            if not cfg_params.has_key('USER.publish_data_name'):
221 >                raise CrabException('Cannot publish output data, because you did not specify USER.publish_data_name parameter in the crab.cfg file')
222 >            else:
223 >                self.processedDataset = cfg_params['USER.publish_data_name']
224  
225 +        self.conf = {}
226 +        self.conf['pubdata'] = None
227 +        # number of jobs requested to be created, limit obj splitting DD
228          #DBSDLS-start
229 <        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
229 >        ## Initialize the variables that are extracted from DBS/DLS and needed in other places of the code
230          self.maxEvents=0  # max events available   ( --> check the requested nb. of evts in Creator.py)
231          self.DBSPaths={}  # all dbs paths requested ( --> input to the site local discovery script)
232          self.jobDestination=[]  # Site destination(s) for each job (list of lists)
233          ## Perform the data location and discovery (based on DBS/DLS)
234          ## SL: Don't if NONE is specified as input (pythia use case)
235          blockSites = {}
236 <        if self.datasetPath:
237 <            blockSites = self.DataDiscoveryAndLocation(cfg_params)
238 <        #DBSDLS-end          
236 > #wmbs
237 >        self.automation = int(self.cfg_params.get('WMBS.automation',0))
238 >        if self.automation == 0:
239 >            if self.datasetPath:
240 >                blockSites = self.DataDiscoveryAndLocation(cfg_params)
241 >            #DBSDLS-end
242 >            self.conf['blockSites']=blockSites
243 >
244 >            ## Select Splitting
245 >            splitByRun = int(cfg_params.get('CMSSW.split_by_run',0))
246 >
247 >            if self.selectNoInput:
248 >                if self.pset == None:
249 >                    self.algo = 'ForScript'
250 >                else:
251 >                    self.algo = 'NoInput'
252 >                    self.conf['managedGenerators']=self.managedGenerators
253 >                    self.conf['generator']=self.generator
254 >            elif self.ads or self.lumiMask or self.lumiParams:
255 >                self.algo = 'LumiBased'
256 >                if splitByRun:
257 >                    msg = "Cannot combine split by run with lumi_mask, ADS, " \
258 >                          "or lumis_per_job. Use split by lumi mode instead."
259 >                    raise CrabException(msg)
260  
261 <        self.tgzNameWithPath = self.getTarBall(self.executable)
262 <    
224 <        ## Select Splitting
225 <        if self.selectNoInput:
226 <            if self.pset == None: #CarlosDaniele
227 <                self.jobSplittingForScript()
261 >            elif splitByRun ==1:
262 >                self.algo = 'RunBased'
263              else:
264 <                self.jobSplittingNoInput()
265 <        else: self.jobSplittingByBlocks(blockSites)
264 >                self.algo = 'EventBased'
265 >            common.logger.debug("Job splitting method: %s" % self.algo)
266  
267 <        # modify Pset
268 <        if self.pset != None: #CarlosDaniele
269 <            try:
270 <                if (self.datasetPath): # standard job
271 <                    # allow to processa a fraction of events in a file
272 <                    self.PsetEdit.inputModule("INPUT")
273 <                    self.PsetEdit.maxEvent("INPUTMAXEVENTS")
274 <                    self.PsetEdit.skipEvent("INPUTSKIPEVENTS")
275 <                else:  # pythia like job
276 <                    self.PsetEdit.maxEvent(self.eventsPerJob)
277 <                    if (self.sourceSeed) :
278 <                        self.PsetEdit.pythiaSeed("INPUT")
279 <                        if (self.sourceSeedVtx) :
280 <                            self.PsetEdit.pythiaSeedVtx("INPUTVTX")
281 <                # add FrameworkJobReport to parameter-set
282 <                self.PsetEdit.addCrabFJR(self.fjrFileName)
283 <                self.PsetEdit.psetWriter(self.configFilename())
284 <            except:
285 <                msg='Error while manipuliating ParameterSet: exiting...'
267 >            splitter = JobSplitter(self.cfg_params,self.conf)
268 >            self.dict = splitter.Algos()[self.algo]()
269 >
270 >        self.argsFile= '%s/arguments.xml'%common.work_space.shareDir()
271 >        self.rootArgsFilename= 'arguments'
272 >        # modify Pset only the first time
273 >        if isNew:
274 >            if self.pset != None: self.ModifyPset()
275 >
276 >            ## Prepare inputSandbox TarBall (only the first time)
277 >            self.tarNameWithPath = self.getTarBall(self.executable)
278 >
279 >
280 >    def ModifyPset(self):
281 >        import PsetManipulator as pp
282 >
283 >        # If pycfg_params set, fake out the config script
284 >        # to make it think it was called with those args
285 >        pycfg_params = self.cfg_params.get('CMSSW.pycfg_params',None)
286 >        if pycfg_params:
287 >            trueArgv = sys.argv
288 >            sys.argv = [self.pset]
289 >            sys.argv.extend(pycfg_params.split(' '))
290 >        PsetEdit = pp.PsetManipulator(self.pset)
291 >        if pycfg_params: # Restore original sys.argv
292 >            sys.argv = trueArgv
293 >
294 >        try:
295 >            # Add FrameworkJobReport to parameter-set, set max events.
296 >            # Reset later for data jobs by writeCFG which does all modifications
297 >            PsetEdit.maxEvent(1)
298 >            PsetEdit.skipEvent(0)
299 >            PsetEdit.psetWriter(self.configFilename())
300 >            ## If present, add TFileService to output files
301 >            if not int(self.cfg_params.get('CMSSW.skip_tfileservice_output',0)):
302 >                tfsOutput = PsetEdit.getTFileService()
303 >                if tfsOutput:
304 >                    if tfsOutput in self.output_file:
305 >                        common.logger.debug("Output from TFileService "+tfsOutput+" already in output files")
306 >                    else:
307 >                        outfileflag = True #output found
308 >                        self.output_file.append(tfsOutput)
309 >                        common.logger.info("Adding "+tfsOutput+" (from TFileService) to list of output files")
310 >                    pass
311 >                pass
312 >
313 >            # If requested, add PoolOutputModule to output files
314 >            ### FEDE FOR MULTI ###
315 >            #edmOutput = PsetEdit.getPoolOutputModule()
316 >            edmOutputDict = PsetEdit.getPoolOutputModule()
317 >            common.logger.debug("(test) edmOutputDict = "+str(edmOutputDict))
318 >            filter_dict = {}
319 >            for key in edmOutputDict.keys():
320 >                filter_dict[key]=edmOutputDict[key]['dataset']
321 >            common.logger.debug("(test) filter_dict for multi =  "+str(filter_dict))
322 >
323 >            #### in CMSSW.sh: export var_filter
324 >
325 >            self.var_filter = json.dumps(filter_dict)
326 >            common.logger.debug("(test) var_filter for multi =  "+self.var_filter)
327 >
328 >            edmOutput = edmOutputDict.keys()
329 >            if int(self.cfg_params.get('CMSSW.get_edm_output',0)):
330 >                if edmOutput:
331 >                    for outputFile in edmOutput:
332 >                        if outputFile in self.output_file:
333 >                            common.logger.debug("Output from PoolOutputModule "+outputFile+" already in output files")
334 >                        else:
335 >                            self.output_file.append(outputFile)
336 >                            common.logger.info("Adding "+outputFile+" (from PoolOutputModule) to list of output files")
337 >            # not requested, check anyhow to avoid accidental T2 overload
338 >            else:
339 >                if edmOutput:
340 >                    missedFiles = []
341 >                    for outputFile in edmOutput:
342 >                        if outputFile not in self.output_file:
343 >                            missedFiles.append(outputFile)
344 >                    if missedFiles:
345 >                        msg  = "ERROR: PoolOutputModule(s) are present in your ParameteSet %s \n"%self.pset
346 >                        msg += "    but the file(s) produced ( %s ) are not in the list of output files\n" % ', '.join(missedFiles)
347 >                        msg += "WARNING: please remove them. If you want to keep them, add the file(s) to output_files or use CMSSW.get_edm_output = 1\n"
348 >                        if int(self.cfg_params.get('CMSSW.ignore_edm_output',0)):
349 >                            msg += "    CMSSW.ignore_edm_output==1 : Hope you know what you are doing...\n"
350 >                            common.logger.info(msg)
351 >                        else :
352 >                            raise CrabException(msg)
353 >
354 >            if (PsetEdit.getBadFilesSetting()):
355 >                msg = "WARNING: You have set skipBadFiles to True. This will continue processing on some errors and you may not be notified."
356 >                common.logger.info(msg)
357 >
358 >        except CrabException, msg:
359 >            common.logger.info(str(msg))
360 >            msg='Error while manipulating ParameterSet (see previous message, if any): exiting...'
361 >            raise CrabException(msg)
362 >
363 >        valid = re.compile('^[\w\.\-]+$')
364 >        for fileName in self.output_file:
365 >            if not valid.match(fileName):
366 >                msg = "The file %s may only contain alphanumeric characters and -, _, ." % fileName
367                  raise CrabException(msg)
368  
369 +
370      def DataDiscoveryAndLocation(self, cfg_params):
371  
372 <        common.logger.debug(10,"CMSSW::DataDiscoveryAndLocation()")
372 >        import DataDiscovery
373 >        import DataLocation
374 >        common.logger.log(10-1,"CMSSW::DataDiscoveryAndLocation()")
375  
376          datasetPath=self.datasetPath
377  
259        ## TODO
260        dataTiersList = ""
261        dataTiers = dataTiersList.split(',')
262
378          ## Contact the DBS
379 <        common.logger.message("Contacting DBS...")
379 >        common.logger.info("Contacting Data Discovery Services ...")
380          try:
381 <            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, dataTiers, cfg_params)
381 >            self.pubdata=DataDiscovery.DataDiscovery(datasetPath, cfg_params,self.skip_blocks)
382              self.pubdata.fetchDBSInfo()
383  
384          except DataDiscovery.NotExistingDatasetError, ex :
385              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
386              raise CrabException(msg)
272
387          except DataDiscovery.NoDataTierinProvenanceError, ex :
388              msg = 'ERROR ***: failed Data Discovery in DBS : %s'%ex.getErrorMessage()
389              raise CrabException(msg)
390          except DataDiscovery.DataDiscoveryError, ex:
391 <            msg = 'ERROR ***: failed Data Discovery in DBS  %s'%ex.getErrorMessage()
391 >            msg = 'ERROR ***: failed Data Discovery in DBS :  %s'%ex.getErrorMessage()
392              raise CrabException(msg)
393  
280        ## get list of all required data in the form of dbs paths  (dbs path = /dataset/datatier/owner)
281        ## self.DBSPaths=self.pubdata.getDBSPaths()
282        common.logger.message("Required data are :"+self.datasetPath)
283
394          self.filesbyblock=self.pubdata.getFiles()
395 <        self.eventsbyblock=self.pubdata.getEventsPerBlock()
286 <        self.eventsbyfile=self.pubdata.getEventsPerFile()
287 <        # print str(self.filesbyblock)
288 <        # print 'self.eventsbyfile',len(self.eventsbyfile)
289 <        # print str(self.eventsbyfile)
395 >        self.conf['pubdata']=self.pubdata
396  
397          ## get max number of events
398 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
293 <        common.logger.message("The number of available events is %s\n"%self.maxEvents)
398 >        self.maxEvents=self.pubdata.getMaxEvents()
399  
295        common.logger.message("Contacting DLS...")
400          ## Contact the DLS and build a list of sites hosting the fileblocks
401          try:
402              dataloc=DataLocation.DataLocation(self.filesbyblock.keys(),cfg_params)
403              dataloc.fetchDLSInfo()
404 +
405          except DataLocation.DataLocationError , ex:
406              msg = 'ERROR ***: failed Data Location in DLS \n %s '%ex.getErrorMessage()
407              raise CrabException(msg)
303        
408  
409 <        sites = dataloc.getSites()
409 >
410 >        unsorted_sites = dataloc.getSites()
411 >        sites = self.filesbyblock.fromkeys(self.filesbyblock,'')
412 >        for lfn in self.filesbyblock.keys():
413 >            if unsorted_sites.has_key(lfn):
414 >                sites[lfn]=unsorted_sites[lfn]
415 >            else:
416 >                sites[lfn]=[]
417 >
418 >        if len(sites)==0:
419 >            msg = 'ERROR ***: no location for any of the blocks of this dataset: \n\t %s \n'%datasetPath
420 >            msg += "\tMaybe the dataset is located only at T1's (or at T0), where analysis jobs are not allowed\n"
421 >            msg += "\tPlease check DataDiscovery page https://cmsweb.cern.ch/dbs_discovery/\n"
422 >            raise CrabException(msg)
423 >
424          allSites = []
425          listSites = sites.values()
426 <        for list in listSites:
427 <            for oneSite in list:
426 >        for listSite in listSites:
427 >            for oneSite in listSite:
428                  allSites.append(oneSite)
429 <        allSites = self.uniquelist(allSites)
429 >        [allSites.append(it) for it in allSites if not allSites.count(it)]
430  
431 <        common.logger.message("Sites ("+str(len(allSites))+") hosting part/all of dataset: "+str(allSites))
432 <        common.logger.debug(6, "List of Sites: "+str(allSites))
433 <        return sites
434 <    
435 <    def jobSplittingByBlocks(self, blockSites):
318 <        """
319 <        Perform job splitting. Jobs run over an integer number of files
320 <        and no more than one block.
321 <        ARGUMENT: blockSites: dictionary with blocks as keys and list of host sites as values
322 <        REQUIRES: self.selectTotalNumberEvents, self.selectEventsPerJob, self.selectNumberofJobs,
323 <                  self.total_number_of_events, self.eventsPerJob, self.theNumberOfJobs,
324 <                  self.maxEvents, self.filesbyblock
325 <        SETS: self.jobDestination - Site destination(s) for each job (a list of lists)
326 <              self.total_number_of_jobs - Total # of jobs
327 <              self.list_of_args - File(s) job will run on (a list of lists)
328 <        """
329 <
330 <        # ---- Handle the possible job splitting configurations ---- #
331 <        if (self.selectTotalNumberEvents):
332 <            totalEventsRequested = self.total_number_of_events
333 <        if (self.selectEventsPerJob):
334 <            eventsPerJobRequested = self.eventsPerJob
335 <            if (self.selectNumberOfJobs):
336 <                totalEventsRequested = self.theNumberOfJobs * self.eventsPerJob
337 <
338 <        # If user requested all the events in the dataset
339 <        if (totalEventsRequested == -1):
340 <            eventsRemaining=self.maxEvents
341 <        # If user requested more events than are in the dataset
342 <        elif (totalEventsRequested > self.maxEvents):
343 <            eventsRemaining = self.maxEvents
344 <            common.logger.message("Requested "+str(self.total_number_of_events)+ " events, but only "+str(self.maxEvents)+" events are available.")
345 <        # If user requested less events than are in the dataset
431 >
432 >        # screen output
433 >        if self.ads or self.lumiMask:
434 >            common.logger.info("Requested (A)DS %s has %s block(s)." %
435 >                               (datasetPath, len(self.filesbyblock.keys())))
436          else:
437 <            eventsRemaining = totalEventsRequested
437 >            common.logger.info("Requested dataset: " + datasetPath + \
438 >                " has " + str(self.maxEvents) + " events in " + \
439 >                str(len(self.filesbyblock.keys())) + " blocks.\n")
440  
441 <        # If user requested more events per job than are in the dataset
350 <        if (self.selectEventsPerJob and eventsPerJobRequested > self.maxEvents):
351 <            eventsPerJobRequested = self.maxEvents
352 <
353 <        # For user info at end
354 <        totalEventCount = 0
355 <
356 <        if (self.selectTotalNumberEvents and self.selectNumberOfJobs):
357 <            eventsPerJobRequested = int(eventsRemaining/self.theNumberOfJobs)
358 <
359 <        if (self.selectNumberOfJobs):
360 <            common.logger.message("May not create the exact number_of_jobs requested.")
361 <
362 <        if ( self.ncjobs == 'all' ) :
363 <            totalNumberOfJobs = 999999999
364 <        else :
365 <            totalNumberOfJobs = self.ncjobs
366 <            
367 <
368 <        blocks = blockSites.keys()
369 <        blockCount = 0
370 <        # Backup variable in case self.maxEvents counted events in a non-included block
371 <        numBlocksInDataset = len(blocks)
372 <
373 <        jobCount = 0
374 <        list_of_lists = []
375 <
376 <        # ---- Iterate over the blocks in the dataset until ---- #
377 <        # ---- we've met the requested total # of events    ---- #
378 <        while ( (eventsRemaining > 0) and (blockCount < numBlocksInDataset) and (jobCount < totalNumberOfJobs)):
379 <            block = blocks[blockCount]
380 <            blockCount += 1
381 <            
382 <
383 <            numEventsInBlock = self.eventsbyblock[block]
384 <            common.logger.debug(5,'Events in Block File '+str(numEventsInBlock))
385 <            
386 <            files = self.filesbyblock[block]
387 <            numFilesInBlock = len(files)
388 <            if (numFilesInBlock <= 0):
389 <                continue
390 <            fileCount = 0
391 <
392 <            # ---- New block => New job ---- #
393 <            parString = "\\{"
394 <            # counter for number of events in files currently worked on
395 <            filesEventCount = 0
396 <            # flag if next while loop should touch new file
397 <            newFile = 1
398 <            # job event counter
399 <            jobSkipEventCount = 0
400 <            
401 <            # ---- Iterate over the files in the block until we've met the requested ---- #
402 <            # ---- total # of events or we've gone over all the files in this block  ---- #
403 <            while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ):
404 <                file = files[fileCount]
405 <                if newFile :
406 <                    try:
407 <                        numEventsInFile = self.eventsbyfile[file]
408 <                        common.logger.debug(6, "File "+str(file)+" has "+str(numEventsInFile)+" events")
409 <                        # increase filesEventCount
410 <                        filesEventCount += numEventsInFile
411 <                        # Add file to current job
412 <                        parString += '\\\"' + file + '\\\"\,'
413 <                        newFile = 0
414 <                    except KeyError:
415 <                        common.logger.message("File "+str(file)+" has unknown number of events: skipping")
416 <                        
417 <
418 <                # if less events in file remain than eventsPerJobRequested
419 <                if ( filesEventCount - jobSkipEventCount < eventsPerJobRequested ) :
420 <                    # if last file in block
421 <                    if ( fileCount == numFilesInBlock-1 ) :
422 <                        # end job using last file, use remaining events in block
423 <                        # close job and touch new file
424 <                        fullString = parString[:-2]
425 <                        fullString += '\\}'
426 <                        list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)])
427 <                        common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).")
428 <                        self.jobDestination.append(blockSites[block])
429 <                        common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
430 <                        # reset counter
431 <                        jobCount = jobCount + 1
432 <                        totalEventCount = totalEventCount + filesEventCount - jobSkipEventCount
433 <                        eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount
434 <                        jobSkipEventCount = 0
435 <                        # reset file
436 <                        parString = "\\{"
437 <                        filesEventCount = 0
438 <                        newFile = 1
439 <                        fileCount += 1
440 <                    else :
441 <                        # go to next file
442 <                        newFile = 1
443 <                        fileCount += 1
444 <                # if events in file equal to eventsPerJobRequested
445 <                elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) :
446 <                    # close job and touch new file
447 <                    fullString = parString[:-2]
448 <                    fullString += '\\}'
449 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
450 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
451 <                    self.jobDestination.append(blockSites[block])
452 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
453 <                    # reset counter
454 <                    jobCount = jobCount + 1
455 <                    totalEventCount = totalEventCount + eventsPerJobRequested
456 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
457 <                    jobSkipEventCount = 0
458 <                    # reset file
459 <                    parString = "\\{"
460 <                    filesEventCount = 0
461 <                    newFile = 1
462 <                    fileCount += 1
463 <                    
464 <                # if more events in file remain than eventsPerJobRequested
465 <                else :
466 <                    # close job but don't touch new file
467 <                    fullString = parString[:-2]
468 <                    fullString += '\\}'
469 <                    list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)])
470 <                    common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.")
471 <                    self.jobDestination.append(blockSites[block])
472 <                    common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount]))
473 <                    # increase counter
474 <                    jobCount = jobCount + 1
475 <                    totalEventCount = totalEventCount + eventsPerJobRequested
476 <                    eventsRemaining = eventsRemaining - eventsPerJobRequested
477 <                    # calculate skip events for last file
478 <                    # use filesEventCount (contains several files), jobSkipEventCount and eventsPerJobRequest
479 <                    jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file])
480 <                    # remove all but the last file
481 <                    filesEventCount = self.eventsbyfile[file]
482 <                    parString = "\\{"
483 <                    parString += '\\\"' + file + '\\\"\,'
484 <                pass # END if
485 <            pass # END while (iterate over files in the block)
486 <        pass # END while (iterate over blocks in the dataset)
487 <        self.ncjobs = self.total_number_of_jobs = jobCount
488 <        if (eventsRemaining > 0 and jobCount < totalNumberOfJobs ):
489 <            common.logger.message("Could not run on all requested events because some blocks not hosted at allowed sites.")
490 <        common.logger.message("\n"+str(jobCount)+" job(s) can run on "+str(totalEventCount)+" events.\n")
491 <        
492 <        self.list_of_args = list_of_lists
493 <        return
441 >        return sites
442  
495    def jobSplittingNoInput(self):
496        """
497        Perform job splitting based on number of event per job
498        """
499        common.logger.debug(5,'Splitting per events')
500        common.logger.message('Required '+str(self.eventsPerJob)+' events per job ')
501        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
502        common.logger.message('Required '+str(self.total_number_of_events)+' events in total ')
443  
444 <        if (self.total_number_of_events < 0):
505 <            msg='Cannot split jobs per Events with "-1" as total number of events'
506 <            raise CrabException(msg)
444 >    def split(self, jobParams,firstJobID):
445  
446 <        if (self.selectEventsPerJob):
447 <            self.total_number_of_jobs = int(self.total_number_of_events/self.eventsPerJob)
448 <        elif (self.selectNumberOfJobs) :
449 <            self.total_number_of_jobs = self.theNumberOfJobs
450 <            self.eventsPerJob = int(self.total_number_of_events/self.total_number_of_jobs)
451 <
452 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
453 <
516 <        # is there any remainder?
517 <        check = int(self.total_number_of_events) - (int(self.total_number_of_jobs)*self.eventsPerJob)
518 <
519 <        common.logger.debug(5,'Check  '+str(check))
520 <
521 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created, each for '+str(self.eventsPerJob)+' for a total of '+str(self.total_number_of_jobs*self.eventsPerJob)+' events')
522 <        if check > 0:
523 <            common.logger.message('Warning: asked '+str(self.total_number_of_events)+' but can do only '+str(int(self.total_number_of_jobs)*self.eventsPerJob))
524 <
525 <        # argument is seed number.$i
526 <        self.list_of_args = []
527 <        for i in range(self.total_number_of_jobs):
528 <            ## Since there is no input, any site is good
529 <           # self.jobDestination.append(["Any"])
530 <            self.jobDestination.append([""]) #must be empty to write correctly the xml
531 <            if (self.sourceSeed):
532 <                if (self.sourceSeedVtx):
533 <                    ## pythia + vtx random seed
534 <                    self.list_of_args.append([
535 <                                              str(self.sourceSeed)+str(i),
536 <                                              str(self.sourceSeedVtx)+str(i)
537 <                                              ])
538 <                else:
539 <                    ## only pythia random seed
540 <                    self.list_of_args.append([(str(self.sourceSeed)+str(i))])
541 <            else:
542 <                ## no random seed
543 <                self.list_of_args.append([str(i)])
544 <        #print self.list_of_args
446 >        jobParams = self.dict['args']
447 >        njobs = self.dict['njobs']
448 >        self.jobDestination = self.dict['jobDestination']
449 >
450 >        if njobs == 0:
451 >            raise CrabException("Asked to split zero jobs: aborting")
452 >        if not self.server and not self.local and njobs > 500:
453 >            raise CrabException("The CRAB client will not submit more than 500 jobs. You must use the server mode.")
454  
455 <        return
455 >        # create the empty structure
456 >        for i in range(njobs):
457 >            jobParams.append("")
458  
459 +        listID=[]
460 +        listField=[]
461 +        listDictions=[]
462 +        exist= os.path.exists(self.argsFile)
463 +        for id in range(njobs):
464 +            job = id + int(firstJobID)
465 +            listID.append(job+1)
466 +            job_ToSave ={}
467 +            concString = ' '
468 +            argu=''
469 +            str_argu = str(job+1)
470 +            if len(jobParams[id]):
471 +                argu = {'JobID': job+1}
472 +                for i in range(len(jobParams[id])):
473 +                    argu[self.dict['params'][i]]=jobParams[id][i]
474 +                    if len(jobParams[id])==1: self.NumEvents = jobParams[id][i]
475 +                # just for debug
476 +                str_argu += concString.join(jobParams[id])
477 +            if argu != '': listDictions.append(argu)
478 +            job_ToSave['arguments']= '%d %d'%( (job+1), 0)
479 +            job_ToSave['dlsDestination']= self.jobDestination[id]
480 +            listField.append(job_ToSave)
481 +            from ProdCommon.SiteDB.CmsSiteMapper import CmsSEMap
482 +            cms_se = CmsSEMap()
483 +            msg="Job  %s  Arguments:  %s\n"%(str(job+1),str_argu)
484 +            msg+="\t  Destination: %s "%(str(self.jobDestination[id]))
485 +            SEDestination = [cms_se[dest] for dest in self.jobDestination[id]]
486 +            msg+="\t  CMSDestination: %s "%(str(SEDestination))
487 +            common.logger.log(10-1,msg)
488 +        # write xml
489 +        if len(listDictions):
490 +            if exist==False: self.CreateXML()
491 +            self.addEntry(listDictions)
492 +        common._db.updateJob_(listID,listField)
493 +        return
494  
495 <    def jobSplittingForScript(self):#CarlosDaniele
495 >    def CreateXML(self):
496          """
551        Perform job splitting based on number of job
497          """
498 <        common.logger.debug(5,'Splitting per job')
499 <        common.logger.message('Required '+str(self.theNumberOfJobs)+' jobs in total ')
555 <
556 <        self.total_number_of_jobs = self.theNumberOfJobs
557 <
558 <        common.logger.debug(5,'N jobs  '+str(self.total_number_of_jobs))
559 <
560 <        common.logger.message(str(self.total_number_of_jobs)+' jobs can be created')
561 <
562 <        # argument is seed number.$i
563 <        self.list_of_args = []
564 <        for i in range(self.total_number_of_jobs):
565 <            ## Since there is no input, any site is good
566 <           # self.jobDestination.append(["Any"])
567 <            self.jobDestination.append([""])
568 <            ## no random seed
569 <            self.list_of_args.append([str(i)])
498 >        result = IMProvNode( self.rootArgsFilename )
499 >        outfile = file( self.argsFile, 'w').write(str(result))
500          return
501  
502 <    def split(self, jobParams):
503 <
504 <        common.jobDB.load()
575 <        #### Fabio
576 <        njobs = self.total_number_of_jobs
577 <        arglist = self.list_of_args
578 <        # create the empty structure
579 <        for i in range(njobs):
580 <            jobParams.append("")
581 <        
582 <        for job in range(njobs):
583 <            jobParams[job] = arglist[job]
584 <            # print str(arglist[job])
585 <            # print jobParams[job]
586 <            common.jobDB.setArguments(job, jobParams[job])
587 <            common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
588 <            common.jobDB.setDestination(job, self.jobDestination[job])
502 >    def addEntry(self, listDictions):
503 >        """
504 >        _addEntry_
505  
506 <        common.jobDB.save()
506 >        add an entry to the xml file
507 >        """
508 >        ## load xml
509 >        improvDoc = loadIMProvFile(self.argsFile)
510 >        entrname= 'Job'
511 >        for dictions in listDictions:
512 >           report = IMProvNode(entrname , None, **dictions)
513 >           improvDoc.addNode(report)
514 >        outfile = file( self.argsFile, 'w').write(str(improvDoc))
515          return
516 <    
593 <    def getJobTypeArguments(self, nj, sched):
594 <        result = ''
595 <        for i in common.jobDB.arguments(nj):
596 <            result=result+str(i)+" "
597 <        return result
598 <  
516 >
517      def numberOfJobs(self):
518 <        # Fabio
519 <        return self.total_number_of_jobs
518 > #wmbs
519 >        if self.automation==0:
520 >           return self.dict['njobs']
521 >        else:
522 >           return None
523  
524      def getTarBall(self, exe):
525          """
526          Return the TarBall with lib and exe
527          """
528 <        
608 <        # if it exist, just return it
609 <        self.tgzNameWithPath = common.work_space.shareDir()+self.tgz_name
528 >        self.tgzNameWithPath = common.work_space.pathForTgz()+self.tgz_name
529          if os.path.exists(self.tgzNameWithPath):
530              return self.tgzNameWithPath
531  
# Line 619 | Line 538 | class Cmssw(JobType):
538  
539          # First of all declare the user Scram area
540          swArea = self.scram.getSWArea_()
622        #print "swArea = ", swArea
623        swVersion = self.scram.getSWVersion()
624        #print "swVersion = ", swVersion
541          swReleaseTop = self.scram.getReleaseTop_()
542 <        #print "swReleaseTop = ", swReleaseTop
627 <        
542 >
543          ## check if working area is release top
544          if swReleaseTop == '' or swArea == swReleaseTop:
545 +            common.logger.debug("swArea = "+swArea+" swReleaseTop ="+swReleaseTop)
546              return
547  
548 <        filesToBeTarred = []
549 <        ## First find the executable
550 <        if (self.executable != ''):
551 <            exeWithPath = self.scram.findFile_(executable)
552 < #           print exeWithPath
553 <            if ( not exeWithPath ):
554 <                raise CrabException('User executable '+executable+' not found')
555 <
556 <            ## then check if it's private or not
557 <            if exeWithPath.find(swReleaseTop) == -1:
558 <                # the exe is private, so we must ship
559 <                common.logger.debug(5,"Exe "+exeWithPath+" to be tarred")
560 <                path = swArea+'/'
561 <                exe = string.replace(exeWithPath, path,'')
562 <                filesToBeTarred.append(exe)
548 >        import tarfile
549 >        try: # create tar ball
550 >            tar = tarfile.open(self.tgzNameWithPath, "w:gz")
551 >            ## First find the executable
552 >            if (self.executable != ''):
553 >                exeWithPath = self.scram.findFile_(executable)
554 >                if ( not exeWithPath ):
555 >                    raise CrabException('User executable '+executable+' not found')
556 >
557 >                ## then check if it's private or not
558 >                if exeWithPath.find(swReleaseTop) == -1:
559 >                    # the exe is private, so we must ship
560 >                    common.logger.debug("Exe "+exeWithPath+" to be tarred")
561 >                    path = swArea+'/'
562 >                    # distinguish case when script is in user project area or given by full path somewhere else
563 >                    if exeWithPath.find(path) >= 0 :
564 >                        exe = string.replace(exeWithPath, path,'')
565 >                        tar.add(path+exe,exe)
566 >                    else :
567 >                        tar.add(exeWithPath,os.path.basename(executable))
568 >                    pass
569 >                else:
570 >                    # the exe is from release, we'll find it on WN
571 >                    pass
572 >
573 >            ## Now get the libraries: only those in local working area
574 >            tar.dereference=True
575 >            libDir = 'lib'
576 >            lib = swArea+'/' +libDir
577 >            common.logger.debug("lib "+lib+" to be tarred")
578 >            if os.path.exists(lib):
579 >                tar.add(lib,libDir)
580 >
581 >            ## Now check if module dir is present
582 >            moduleDir = 'module'
583 >            module = swArea + '/' + moduleDir
584 >            if os.path.isdir(module):
585 >                tar.add(module,moduleDir)
586 >            tar.dereference=False
587 >
588 >            ## Now check if any data dir(s) is present
589 >            self.dataExist = False
590 >            todo_list = [(i, i) for i in  os.listdir(swArea+"/src")]
591 >            while len(todo_list):
592 >                entry, name = todo_list.pop()
593 >                if name.startswith('crab_0_') or  name.startswith('.') or name == 'CVS':
594 >                    continue
595 >                if os.path.isdir(swArea+"/src/"+entry):
596 >                    entryPath = entry + '/'
597 >                    todo_list += [(entryPath + i, i) for i in  os.listdir(swArea+"/src/"+entry)]
598 >                    if name == 'data':
599 >                        self.dataExist=True
600 >                        common.logger.debug("data "+entry+" to be tarred")
601 >                        tar.add(swArea+"/src/"+entry,"src/"+entry)
602 >                    pass
603                  pass
604 <            else:
605 <                # the exe is from release, we'll find it on WN
604 >
605 >            ### CMSSW ParameterSet
606 >            if not self.pset is None:
607 >                cfg_file = common.work_space.jobDir()+self.configFilename()
608 >                pickleFile = common.work_space.jobDir()+self.configFilename() + '.pkl'
609 >                tar.add(cfg_file,self.configFilename())
610 >                tar.add(pickleFile,self.configFilename() + '.pkl')
611 >
612 >            try:
613 >                crab_cfg_file = common.work_space.shareDir()+'/crab.cfg'
614 >                tar.add(crab_cfg_file,'crab.cfg')
615 >            except:
616                  pass
617 <
618 <        ## Now get the libraries: only those in local working area
619 <        libDir = 'lib'
620 <        lib = swArea+'/' +libDir
621 <        common.logger.debug(5,"lib "+lib+" to be tarred")
622 <        if os.path.exists(lib):
623 <            filesToBeTarred.append(libDir)
624 <
625 <        ## Now check if module dir is present
626 <        moduleDir = 'module'
627 <        if os.path.isdir(swArea+'/'+moduleDir):
628 <            filesToBeTarred.append(moduleDir)
629 <
630 <        ## Now check if the Data dir is present
631 <        dataDir = 'src/Data/'
632 <        if os.path.isdir(swArea+'/'+dataDir):
633 <            filesToBeTarred.append(dataDir)
634 <
635 <        ## copy ProdAgent dir to swArea
636 <        cmd = '\cp -rf ' + os.environ['CRABDIR'] + '/ProdAgentApi ' + swArea
637 <        cmd_out = runCommand(cmd)
638 <        if cmd_out != '':
639 <            common.logger.message('ProdAgentApi directory could not be copied to local CMSSW project directory.')
640 <            common.logger.message('No FrameworkJobreport parsing is possible on the WorkerNode.')
641 <
642 <        ## Now check if the Data dir is present
643 <        paDir = 'ProdAgentApi'
644 <        if os.path.isdir(swArea+'/'+paDir):
645 <            filesToBeTarred.append(paDir)
646 <
647 <        ## Create the tar-ball
648 <        if len(filesToBeTarred)>0:
649 <            cwd = os.getcwd()
650 <            os.chdir(swArea)
651 <            tarcmd = 'tar zcvf ' + self.tgzNameWithPath + ' '
652 <            for line in filesToBeTarred:
653 <                tarcmd = tarcmd + line + ' '
654 <            cout = runCommand(tarcmd)
655 <            if not cout:
656 <                raise CrabException('Could not create tar-ball')
657 <            os.chdir(cwd)
658 <        else:
659 <            common.logger.debug(5,"No files to be to be tarred")
660 <        
661 <        return
662 <        
663 <    def wsSetupEnvironment(self, nj):
617 >
618 >            ## Add ProdCommon dir to tar
619 >            prodcommonDir = './'
620 >            prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/'
621 >            neededStuff = ['ProdCommon/__init__.py','ProdCommon/FwkJobRep', 'ProdCommon/CMSConfigTools', \
622 >                           'ProdCommon/Core', 'ProdCommon/MCPayloads', 'IMProv', 'ProdCommon/Storage', \
623 >                           'WMCore/__init__.py','WMCore/Algorithms']
624 >            for file in neededStuff:
625 >                tar.add(prodcommonPath+file,prodcommonDir+file)
626 >
627 >            ##### ML stuff
628 >            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
629 >            path=os.environ['CRABDIR'] + '/python/'
630 >            for file in ML_file_list:
631 >                tar.add(path+file,file)
632 >
633 >            ##### Utils
634 >            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py','cmscp.py']
635 >            for file in Utils_file_list:
636 >                tar.add(path+file,file)
637 >
638 >            ##### AdditionalFiles
639 >            tar.dereference=True
640 >            for file in self.additional_inbox_files:
641 >                tar.add(file,string.split(file,'/')[-1])
642 >            tar.dereference=False
643 >            common.logger.log(10-1,"Files in "+self.tgzNameWithPath+" : "+str(tar.getnames()))
644 >
645 >            tar.close()
646 >        except IOError, exc:
647 >            msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
648 >            msg += str(exc)
649 >            raise CrabException(msg)
650 >        except tarfile.TarError, exc:
651 >            msg = 'Could not create tar-ball %s \n'%self.tgzNameWithPath
652 >            msg += str(exc)
653 >            raise CrabException(msg)
654 >
655 >        tarballinfo = os.stat(self.tgzNameWithPath)
656 >        if ( tarballinfo.st_size > self.MaxTarBallSize*1024*1024 ) :
657 >            if not self.server:
658 >                msg  = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + \
659 >                         str(self.MaxTarBallSize) +'MB input sandbox limit \n'
660 >                msg += '      and not supported by the direct GRID submission system.\n'
661 >                msg += '      Please use the CRAB server mode by setting server_name=<NAME> in section [CRAB] of your crab.cfg.\n'
662 >                msg += '      For further infos please see https://twiki.cern.ch/twiki/bin/view/CMS/SWGuideCrabServerForUsers#Server_available_for_users'
663 >            else:
664 >                msg  = 'Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' +  \
665 >                        str(self.MaxTarBallSize) +'MB input sandbox limit in the server.'
666 >            raise CrabException(msg)
667 >
668 >        ## create tar-ball with ML stuff
669 >
670 >    def wsSetupEnvironment(self, nj=0):
671          """
672          Returns part of a job script which prepares
673          the execution environment for the job 'nj'.
674          """
675 <        # Prepare JobType-independent part
703 <        txt = ''
704 <  
705 <        ## OLI_Daniele at this level  middleware already known
675 >        psetName = 'pset.py'
676  
677 <        txt += 'if [ $middleware == LCG ]; then \n'
677 >        # Prepare JobType-independent part
678 >        txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
679 >        txt += 'echo ">>> setup environment"\n'
680 >        txt += 'echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
681 >        txt += 'export SCRAM_ARCH=' + self.executable_arch + '\n'
682 >        txt += 'echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
683 >        txt += 'if [ $middleware == LCG ] || [ $middleware == CAF ] || [ $middleware == LSF ]; then \n'
684          txt += self.wsSetupCMSLCGEnvironment_()
685          txt += 'elif [ $middleware == OSG ]; then\n'
686          txt += '    WORKING_DIR=`/bin/mktemp  -d $OSG_WN_TMP/cms_XXXXXXXXXXXX`\n'
687 <        txt += '    echo "Created working directory: $WORKING_DIR"\n'
688 <        txt += '    if [ ! -d $WORKING_DIR ] ;then\n'
689 <        txt += '        echo "SET_CMS_ENV 10016 ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
690 <        txt += '        echo "JOB_EXIT_STATUS = 10016"\n'
715 <        txt += '        echo "JobExitCode=10016" | tee -a $RUNTIME_AREA/$repo\n'
716 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
717 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
718 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
719 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
720 <        txt += '        exit 1\n'
687 >        txt += '    if [ ! $? == 0 ] ;then\n'
688 >        txt += '        echo "ERROR ==> OSG $WORKING_DIR could not be created on WN `hostname`"\n'
689 >        txt += '        job_exit_code=10016\n'
690 >        txt += '        func_exit\n'
691          txt += '    fi\n'
692 +        txt += '    echo ">>> Created working directory: $WORKING_DIR"\n'
693          txt += '\n'
694          txt += '    echo "Change to working directory: $WORKING_DIR"\n'
695          txt += '    cd $WORKING_DIR\n'
696 <        txt += self.wsSetupCMSOSGEnvironment_()
696 >        txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
697 >        txt += self.wsSetupCMSOSGEnvironment_()
698 >        #Setup SGE Environment
699 >        txt += 'elif [ $middleware == SGE ]; then\n'
700 >        txt += self.wsSetupCMSLCGEnvironment_()
701 >
702 >        txt += 'elif [ $middleware == ARC ]; then\n'
703 >        txt += self.wsSetupCMSLCGEnvironment_()
704 >
705 >        #Setup PBS Environment
706 >        txt += 'elif [ $middleware == PBS ]; then\n'
707 >        txt += self.wsSetupCMSLCGEnvironment_()
708 >
709          txt += 'fi\n'
710  
711          # Prepare JobType-specific part
712          scram = self.scram.commandName()
713          txt += '\n\n'
714 <        txt += 'echo "### SPECIFIC JOB SETUP ENVIRONMENT ###"\n'
714 >        txt += 'echo ">>> specific cmssw setup environment:"\n'
715 >        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
716          txt += scram+' project CMSSW '+self.version+'\n'
717          txt += 'status=$?\n'
718          txt += 'if [ $status != 0 ] ; then\n'
719 <        txt += '   echo "SET_EXE_ENV 10034 ==>ERROR CMSSW '+self.version+' not found on `hostname`" \n'
720 <        txt += '   echo "JOB_EXIT_STATUS = 10034"\n'
721 <        txt += '   echo "JobExitCode=10034" | tee -a $RUNTIME_AREA/$repo\n'
738 <        txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
739 <        txt += '   rm -f $RUNTIME_AREA/$repo \n'
740 <        txt += '   echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
741 <        txt += '   echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
742 <        ## OLI_Daniele
743 <        txt += '    if [ $middleware == OSG ]; then \n'
744 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
745 <        txt += '        cd $RUNTIME_AREA\n'
746 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
747 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
748 <        txt += '            echo "SET_CMS_ENV 10018 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after CMSSW CMSSW_0_6_1 not found on `hostname`"\n'
749 <        txt += '            echo "JOB_EXIT_STATUS = 10018"\n'
750 <        txt += '            echo "JobExitCode=10018" | tee -a $RUNTIME_AREA/$repo\n'
751 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
752 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
753 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
754 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
755 <        txt += '        fi\n'
756 <        txt += '    fi \n'
757 <        txt += '   exit 1 \n'
719 >        txt += '    echo "ERROR ==> CMSSW '+self.version+' not found on `hostname`" \n'
720 >        txt += '    job_exit_code=10034\n'
721 >        txt += '    func_exit\n'
722          txt += 'fi \n'
759        txt += 'echo "CMSSW_VERSION =  '+self.version+'"\n'
723          txt += 'cd '+self.version+'\n'
724 <        ### needed grep for bug in scramv1 ###
724 >        txt += 'SOFTWARE_DIR=`pwd`; export SOFTWARE_DIR\n'
725 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
726          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
727 <
727 >        txt += 'if [ $? != 0 ] ; then\n'
728 >        txt += '    echo "ERROR ==> Problem with the command: "\n'
729 >        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
730 >        txt += '    job_exit_code=10034\n'
731 >        txt += '    func_exit\n'
732 >        txt += 'fi \n'
733          # Handle the arguments:
734          txt += "\n"
735 <        txt += "## number of arguments (first argument always jobnumber)\n"
735 >        txt += "## number of arguments (first argument always jobnumber, the second is the resubmission number)\n"
736          txt += "\n"
737 < #        txt += "narg=$#\n"
769 <        txt += "if [ $nargs -lt 2 ]\n"
737 >        txt += "if [ $nargs -lt "+str(self.argsList)+" ]\n"
738          txt += "then\n"
739 <        txt += "    echo 'SET_EXE_ENV 1 ==> ERROR Too few arguments' +$nargs+ \n"
740 <        txt += '    echo "JOB_EXIT_STATUS = 50113"\n'
741 <        txt += '    echo "JobExitCode=50113" | tee -a $RUNTIME_AREA/$repo\n'
774 <        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
775 <        txt += '    rm -f $RUNTIME_AREA/$repo \n'
776 <        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
777 <        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
778 <        ## OLI_Daniele
779 <        txt += '    if [ $middleware == OSG ]; then \n'
780 <        txt += '        echo "Remove working directory: $WORKING_DIR"\n'
781 <        txt += '        cd $RUNTIME_AREA\n'
782 <        txt += '        /bin/rm -rf $WORKING_DIR\n'
783 <        txt += '        if [ -d $WORKING_DIR ] ;then\n'
784 <        txt += '            echo "SET_EXE_ENV 50114 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Too few arguments for CRAB job wrapper"\n'
785 <        txt += '            echo "JOB_EXIT_STATUS = 50114"\n'
786 <        txt += '            echo "JobExitCode=50114" | tee -a $RUNTIME_AREA/$repo\n'
787 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
788 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
789 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
790 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
791 <        txt += '        fi\n'
792 <        txt += '    fi \n'
793 <        txt += "    exit 1\n"
739 >        txt += "    echo 'ERROR ==> Too few arguments' +$nargs+ \n"
740 >        txt += '    job_exit_code=50113\n'
741 >        txt += "    func_exit\n"
742          txt += "fi\n"
743          txt += "\n"
744  
745          # Prepare job-specific part
746          job = common.job_list[nj]
747 <        if self.pset != None: #CarlosDaniele
800 <            pset = os.path.basename(job.configFilename())
747 >        if (self.datasetPath):
748              txt += '\n'
749 <            if (self.datasetPath): # standard job
803 <                #txt += 'InputFiles=$2\n'
804 <                txt += 'InputFiles=${args[1]}\n'
805 <                txt += 'MaxEvents=${args[2]}\n'
806 <                txt += 'SkipEvents=${args[3]}\n'
807 <                txt += 'echo "Inputfiles:<$InputFiles>"\n'
808 <                txt += 'sed "s#{\'INPUT\'}#$InputFiles#" $RUNTIME_AREA/'+pset+' > pset_tmp_1.cfg\n'
809 <                txt += 'echo "MaxEvents:<$MaxEvents>"\n'
810 <                txt += 'sed "s#INPUTMAXEVENTS#$MaxEvents#" pset_tmp_1.cfg > pset_tmp_2.cfg\n'
811 <                txt += 'echo "SkipEvents:<$SkipEvents>"\n'
812 <                txt += 'sed "s#INPUTSKIPEVENTS#$SkipEvents#" pset_tmp_2.cfg > pset.cfg\n'
813 <            else:  # pythia like job
814 <                if (self.sourceSeed):
815 < #                    txt += 'Seed=$2\n'
816 <                    txt += 'Seed=${args[1]}\n'
817 <                    txt += 'echo "Seed: <$Seed>"\n'
818 <                    txt += 'sed "s#\<INPUT\>#$Seed#" $RUNTIME_AREA/'+pset+' > tmp.cfg\n'
819 <                    if (self.sourceSeedVtx):
820 < #                        txt += 'VtxSeed=$3\n'
821 <                        txt += 'VtxSeed=${args[2]}\n'
822 <                        txt += 'echo "VtxSeed: <$VtxSeed>"\n'
823 <                        txt += 'sed "s#INPUTVTX#$VtxSeed#" tmp.cfg > pset.cfg\n'
824 <                    else:
825 <                        txt += 'mv tmp.cfg pset.cfg\n'
826 <                else:
827 <                    txt += '# Copy untouched pset\n'
828 <                    txt += 'cp $RUNTIME_AREA/'+pset+' pset.cfg\n'
749 >            txt += 'DatasetPath='+self.datasetPath+'\n'
750  
751 +            txt += 'PrimaryDataset='+self.primaryDataset +'\n'
752 +            txt += 'DataTier='+self.dataTier+'\n'
753 +            txt += 'ApplicationFamily=cmsRun\n'
754  
755 <        if len(self.additional_inbox_files) > 0:
756 <            for file in self.additional_inbox_files:
757 <                relFile = file.split("/")[-1]
758 <                txt += 'if [ -e $RUNTIME_AREA/'+relFile+' ] ; then\n'
759 <                txt += '   cp $RUNTIME_AREA/'+relFile+' .\n'
760 <                txt += '   chmod +x '+relFile+'\n'
761 <                txt += 'fi\n'
762 <            pass
839 <
840 <        if self.pset != None: #CarlosDaniele
841 <            txt += 'echo "### END JOB SETUP ENVIRONMENT ###"\n\n'
842 <        
755 >        else:
756 >            txt += 'DatasetPath=MCDataTier\n'
757 >            txt += 'PrimaryDataset=null\n'
758 >            txt += 'DataTier=null\n'
759 >            txt += 'ApplicationFamily=MCDataTier\n'
760 >        if self.pset != None:
761 >            pset = os.path.basename(job.configFilename())
762 >            pkl  = os.path.basename(job.configFilename()) + '.pkl'
763              txt += '\n'
764 <            txt += 'echo "***** cat pset.cfg *********"\n'
765 <            txt += 'cat pset.cfg\n'
766 <            txt += 'echo "****** end pset.cfg ********"\n'
764 >            txt += 'cp  $RUNTIME_AREA/'+pset+' .\n'
765 >            txt += 'cp  $RUNTIME_AREA/'+pkl+' .\n'
766 >
767 >            txt += 'PreserveSeeds='  + ','.join(self.preserveSeeds)  + '; export PreserveSeeds\n'
768 >            txt += 'IncrementSeeds=' + ','.join(self.incrementSeeds) + '; export IncrementSeeds\n'
769 >            txt += 'echo "PreserveSeeds: <$PreserveSeeds>"\n'
770 >            txt += 'echo "IncrementSeeds:<$IncrementSeeds>"\n'
771 >
772 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
773 >            if self.var_filter:
774 >                #print "self.var_filter = ",self.var_filter
775 >                txt += "export var_filter="+"'"+self.var_filter+"'\n"
776 >                txt += 'echo $var_filter'
777 >        else:
778              txt += '\n'
779 <            # txt += 'echo "***** cat pset1.cfg *********"\n'
780 <            # txt += 'cat pset1.cfg\n'
850 <            # txt += 'echo "****** end pset1.cfg ********"\n'
779 >            if self.AdditionalArgs: txt += 'export AdditionalArgs=\"%s\"\n'%(self.AdditionalArgs)
780 >            if int(self.NumEvents) != 0: txt += 'export MaxEvents=%s\n'%str(self.NumEvents)
781          return txt
782  
783 <    def wsBuildExe(self, nj):
783 >    def wsUntarSoftware(self, nj=0):
784          """
785          Put in the script the commands to build an executable
786          or a library.
787          """
788  
789 <        txt = ""
789 >        txt = '\n#Written by cms_cmssw::wsUntarSoftware\n'
790  
791          if os.path.isfile(self.tgzNameWithPath):
792 <            txt += 'echo "tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'"\n'
793 <            txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
792 >            txt += 'echo ">>> tar xzf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
793 >            if  self.debug_wrapper==1 :
794 >                txt += 'tar zxvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
795 >                txt += 'ls -Al \n'
796 >            else:
797 >                txt += 'tar zxf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
798              txt += 'untar_status=$? \n'
799              txt += 'if [ $untar_status -ne 0 ]; then \n'
800 <            txt += '   echo "SET_EXE 1 ==> ERROR Untarring .tgz file failed"\n'
801 <            txt += '   echo "JOB_EXIT_STATUS = $untar_status" \n'
802 <            txt += '   echo "JobExitCode=$untar_status" | tee -a $RUNTIME_AREA/$repo\n'
869 <            txt += '   if [ $middleware == OSG ]; then \n'
870 <            txt += '       echo "Remove working directory: $WORKING_DIR"\n'
871 <            txt += '       cd $RUNTIME_AREA\n'
872 <            txt += '       /bin/rm -rf $WORKING_DIR\n'
873 <            txt += '       if [ -d $WORKING_DIR ] ;then\n'
874 <            txt += '           echo "SET_EXE 50999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after Untarring .tgz file failed"\n'
875 <            txt += '           echo "JOB_EXIT_STATUS = 50999"\n'
876 <            txt += '           echo "JobExitCode=50999" | tee -a $RUNTIME_AREA/$repo\n'
877 <            txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
878 <            txt += '           rm -f $RUNTIME_AREA/$repo \n'
879 <            txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
880 <            txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
881 <            txt += '       fi\n'
882 <            txt += '   fi \n'
883 <            txt += '   \n'
884 <            txt += '   exit 1 \n'
800 >            txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
801 >            txt += '   job_exit_code=$untar_status\n'
802 >            txt += '   func_exit\n'
803              txt += 'else \n'
804              txt += '   echo "Successful untar" \n'
805              txt += 'fi \n'
806              txt += '\n'
807 <            txt += 'echo "Include ProdAgentApi in PYTHONPATH"\n'
807 >            txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
808              txt += 'if [ -z "$PYTHONPATH" ]; then\n'
809 <            txt += '   export PYTHONPATH=ProdAgentApi\n'
809 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
810              txt += 'else\n'
811 <            txt += '   export PYTHONPATH=ProdAgentApi:${PYTHONPATH}\n'
811 >            txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
812 >            txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
813              txt += 'fi\n'
814              txt += '\n'
815  
816              pass
817 <        
817 >
818          return txt
819  
820 <    def modifySteeringCards(self, nj):
820 >    def wsBuildExe(self, nj=0):
821          """
822 <        modify the card provided by the user,
823 <        writing a new card into share dir
822 >        Put in the script the commands to build an executable
823 >        or a library.
824          """
825 <        
825 >
826 >        txt = '\n#Written by cms_cmssw::wsBuildExe\n'
827 >        txt += 'echo ">>> moving CMSSW software directories in `pwd`" \n'
828 >
829 >        txt += 'rm -r lib/ module/ \n'
830 >        txt += 'mv $RUNTIME_AREA/lib/ . \n'
831 >        txt += 'mv $RUNTIME_AREA/module/ . \n'
832 >        if self.dataExist == True:
833 >            txt += 'rm -r src/ \n'
834 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
835 >        if len(self.additional_inbox_files)>0:
836 >            for file in self.additional_inbox_files:
837 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
838 >
839 >        txt += 'echo ">>> Include $RUNTIME_AREA in PYTHONPATH:"\n'
840 >        txt += 'if [ -z "$PYTHONPATH" ]; then\n'
841 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/\n'
842 >        txt += 'else\n'
843 >        txt += '   export PYTHONPATH=$RUNTIME_AREA/:${PYTHONPATH}\n'
844 >        txt += 'echo "PYTHONPATH=$PYTHONPATH"\n'
845 >        txt += 'fi\n'
846 >        txt += '\n'
847 >
848 >        if self.pset != None:
849 >            psetName = 'pset.py'
850 >
851 >            txt += '\n'
852 >            if self.debug_wrapper == 1:
853 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
854 >                txt += 'cat ' + psetName + '\n'
855 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
856 >                txt += '\n'
857 >                txt += 'echo "***********************" \n'
858 >                txt += 'which edmConfigHash \n'
859 >                txt += 'echo "***********************" \n'
860 >            txt += 'edmConfigHash ' + psetName + ' \n'
861 >            txt += 'PSETHASH=`edmConfigHash ' + psetName + '` \n'
862 >            txt += 'echo "PSETHASH = $PSETHASH" \n'
863 >            #### FEDE temporary fix for noEdm files #####
864 >            txt += 'if [ -z "$PSETHASH" ]; then \n'
865 >            txt += '   export PSETHASH=null\n'
866 >            txt += 'fi \n'
867 >            #############################################
868 >            txt += '\n'
869 >        return txt
870 >
871 >
872      def executableName(self):
873 <        if self.pset == None: #CarlosDaniele
873 >        if self.scriptExe:
874              return "sh "
875          else:
876              return self.executable
877  
878      def executableArgs(self):
879 <        if self.pset == None:#CarlosDaniele
880 <            return   self.scriptExe + " $NJob"
881 <        else:
882 <            return " -p pset.cfg"
879 >        if self.scriptExe:
880 >            return self.scriptExe + " $NJob $AdditionalArgs"
881 >        else:
882 >            return " -j $RUNTIME_AREA/crab_fjr_$NJob.xml -p pset.py"
883  
884      def inputSandbox(self, nj):
885          """
886          Returns a list of filenames to be put in JDL input sandbox.
887          """
888          inp_box = []
924        # dict added to delete duplicate from input sandbox file list
925        seen = {}
926        ## code
889          if os.path.isfile(self.tgzNameWithPath):
890              inp_box.append(self.tgzNameWithPath)
891 <        ## config
892 <        if not self.pset is None: #CarlosDaniele
893 <            inp_box.append(common.job_list[nj].configFilename())
932 <        ## additional input files
933 <        #for file in self.additional_inbox_files:
934 <        #    inp_box.append(common.work_space.cwdDir()+file)
891 >        if os.path.isfile(self.argsFile):
892 >            inp_box.append(self.argsFile)
893 >        inp_box.append(common.work_space.jobDir() + self.scriptName)
894          return inp_box
895  
896      def outputSandbox(self, nj):
# Line 940 | Line 899 | class Cmssw(JobType):
899          """
900          out_box = []
901  
943        stdout=common.job_list[nj].stdout()
944        stderr=common.job_list[nj].stderr()
945
902          ## User Declared output files
903 <        for out in self.output_file:
904 <            n_out = nj + 1
905 <            out_box.append(self.numberFile_(out,str(n_out)))
903 >        for out in (self.output_file+self.output_file_sandbox):
904 >            n_out = nj + 1
905 >            out_box.append(numberFile(out,str(n_out)))
906          return out_box
951        return []
907  
953    def prepareSteeringCards(self):
954        """
955        Make initial modifications of the user's steering card file.
956        """
957        return
908  
909      def wsRenameOutput(self, nj):
910          """
911          Returns part of a job script which renames the produced files.
912          """
913  
914 <        txt = '\n'
915 <        txt += '# directory content\n'
916 <        txt += 'ls \n'
917 <        file_list = ''
918 <        for fileWithSuffix in self.output_file:
919 <            output_file_num = self.numberFile_(fileWithSuffix, '$NJob')
920 <            file_list=file_list+output_file_num+' '
914 >        txt = '\n#Written by cms_cmssw::wsRenameOutput\n'
915 >        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
916 >        txt += 'echo ">>> current directory content:"\n'
917 >        if self.debug_wrapper==1:
918 >            txt += 'ls -Al\n'
919 >        txt += '\n'
920 >
921 >        for fileWithSuffix in (self.output_file):
922 >            output_file_num = numberFile(fileWithSuffix, '$OutUniqueID')
923              txt += '\n'
924              txt += '# check output file\n'
925 <            txt += 'ls '+fileWithSuffix+'\n'
926 <            txt += 'ls_result=$?\n'
927 <            #txt += 'exe_result=$?\n'
928 <            txt += 'if [ $ls_result -ne 0 ] ; then\n'
929 <            txt += '   echo "ERROR: Problem with output file"\n'
930 <            #txt += '   echo "JOB_EXIT_STATUS = $exe_result"\n'
931 <            #txt += '   echo "JobExitCode=60302" | tee -a $RUNTIME_AREA/$repo\n'
932 <            #txt += '   dumpStatus $RUNTIME_AREA/$repo\n'
933 <            ### OLI_DANIELE
934 <            if common.scheduler.boss_scheduler_name == 'condor_g':
925 >            txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n'
926 >            if (self.copy_data == 1):  # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA
927 >                txt += '    mv '+fileWithSuffix+' '+output_file_num+'\n'
928 >                txt += '    ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
929 >            else:
930 >                txt += '    mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
931 >                txt += '    ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n'
932 >            txt += 'else\n'
933 >            txt += '    job_exit_code=60302\n'
934 >            txt += '    echo "WARNING: Output file '+fileWithSuffix+' not found"\n'
935 >            if common.scheduler.name().upper() == 'CONDOR_G':
936                  txt += '    if [ $middleware == OSG ]; then \n'
937                  txt += '        echo "prepare dummy output file"\n'
938                  txt += '        echo "Processing of job output failed" > $RUNTIME_AREA/'+output_file_num+'\n'
939                  txt += '    fi \n'
987            txt += 'else\n'
988            txt += '   cp '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n'
940              txt += 'fi\n'
941 <      
942 <        txt += 'cd $RUNTIME_AREA\n'
943 <        file_list=file_list[:-1]
944 <        txt += 'file_list="'+file_list+'"\n'
945 <        txt += 'cd $RUNTIME_AREA\n'
995 <        ### OLI_DANIELE
996 <        txt += 'if [ $middleware == OSG ]; then\n'  
997 <        txt += '    cd $RUNTIME_AREA\n'
998 <        txt += '    echo "Remove working directory: $WORKING_DIR"\n'
999 <        txt += '    /bin/rm -rf $WORKING_DIR\n'
1000 <        txt += '    if [ -d $WORKING_DIR ] ;then\n'
1001 <        txt += '        echo "SET_EXE 60999 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after cleanup of WN"\n'
1002 <        txt += '        echo "JOB_EXIT_STATUS = 60999"\n'
1003 <        txt += '        echo "JobExitCode=60999" | tee -a $RUNTIME_AREA/$repo\n'
1004 <        txt += '        dumpStatus $RUNTIME_AREA/$repo\n'
1005 <        txt += '        rm -f $RUNTIME_AREA/$repo \n'
1006 <        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1007 <        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1008 <        txt += '    fi\n'
1009 <        txt += 'fi\n'
941 >        file_list = []
942 >        for fileWithSuffix in (self.output_file):
943 >             file_list.append(numberFile('$SOFTWARE_DIR/'+fileWithSuffix, '$OutUniqueID'))
944 >
945 >        txt += 'file_list="'+string.join(file_list,',')+'"\n'
946          txt += '\n'
947 +        txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
948 +        txt += 'echo ">>> current directory content:"\n'
949 +        if self.debug_wrapper==1:
950 +            txt += 'ls -Al\n'
951 +        txt += '\n'
952 +        txt += 'cd $RUNTIME_AREA\n'
953 +        txt += 'echo ">>> current directory (RUNTIME_AREA):  $RUNTIME_AREA"\n'
954          return txt
955  
956 <    def numberFile_(self, file, txt):
1014 <        """
1015 <        append _'txt' before last extension of a file
1016 <        """
1017 <        p = string.split(file,".")
1018 <        # take away last extension
1019 <        name = p[0]
1020 <        for x in p[1:-1]:
1021 <           name=name+"."+x
1022 <        # add "_txt"
1023 <        if len(p)>1:
1024 <          ext = p[len(p)-1]
1025 <          result = name + '_' + txt + "." + ext
1026 <        else:
1027 <          result = name + '_' + txt
1028 <        
1029 <        return result
1030 <
1031 <    def getRequirements(self):
956 >    def getRequirements(self, nj=[]):
957          """
958 <        return job requirements to add to jdl files
958 >        return job requirements to add to jdl files
959          """
960          req = ''
961          if self.version:
962              req='Member("VO-cms-' + \
963                   self.version + \
964                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
965 +        if self.executable_arch:
966 +            req+=' && Member("VO-cms-' + \
967 +                 self.executable_arch + \
968 +                 '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
969  
970          req = req + ' && (other.GlueHostNetworkAdapterOutboundIP)'
971 +        if ( common.scheduler.name() in ["glite"] ):
972 +            ## 25-Jun-2009 SL: patch to use Cream enabled WMS
973 +            if ( self.cfg_params.get('GRID.use_cream',None) ):
974 +                req += ' && (other.GlueCEStateStatus == "Production" || other.GlueCEStateStatus == "Special")'
975 +            else:
976 +                req += ' && other.GlueCEStateStatus == "Production" '
977  
978          return req
979  
980      def configFilename(self):
981          """ return the config filename """
982 <        return self.name()+'.cfg'
982 >        return self.name()+'.py'
983  
1049    ### OLI_DANIELE
984      def wsSetupCMSOSGEnvironment_(self):
985          """
986          Returns part of a job script which is prepares
987          the execution environment and which is common for all CMS jobs.
988          """
989 <        txt = '\n'
990 <        txt += '   echo "### SETUP CMS OSG  ENVIRONMENT ###"\n'
991 <        txt += '   if [ -f $GRID3_APP_DIR/cmssoft/cmsset_default.sh ] ;then\n'
992 <        txt += '      # Use $GRID3_APP_DIR/cmssoft/cmsset_default.sh to setup cms software\n'
993 <        txt += '       source $GRID3_APP_DIR/cmssoft/cmsset_default.sh '+self.version+'\n'
994 <        txt += '   elif [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
989 >        txt = '\n#Written by cms_cmssw::wsSetupCMSOSGEnvironment_\n'
990 >        txt += '    echo ">>> setup CMS OSG environment:"\n'
991 >        txt += '    echo "set SCRAM ARCH to ' + self.executable_arch + '"\n'
992 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
993 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
994 >        txt += '    if [ -f $OSG_APP/cmssoft/cms/cmsset_default.sh ] ;then\n'
995          txt += '      # Use $OSG_APP/cmssoft/cms/cmsset_default.sh to setup cms software\n'
996 <        txt += '       source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
997 <        txt += '   else\n'
998 <        txt += '       echo "SET_CMS_ENV 10020 ==> ERROR $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
999 <        txt += '       echo "JOB_EXIT_STATUS = 10020"\n'
1000 <        txt += '       echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1001 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1068 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1069 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1070 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1071 <        txt += '       exit 1\n'
1072 <        txt += '\n'
1073 <        txt += '       echo "Remove working directory: $WORKING_DIR"\n'
1074 <        txt += '       cd $RUNTIME_AREA\n'
1075 <        txt += '       /bin/rm -rf $WORKING_DIR\n'
1076 <        txt += '       if [ -d $WORKING_DIR ] ;then\n'
1077 <        txt += '            echo "SET_CMS_ENV 10017 ==> OSG $WORKING_DIR could not be deleted on WN `hostname` after $GRID3_APP_DIR/cmssoft/cmsset_default.sh and $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
1078 <        txt += '            echo "JOB_EXIT_STATUS = 10017"\n'
1079 <        txt += '            echo "JobExitCode=10017" | tee -a $RUNTIME_AREA/$repo\n'
1080 <        txt += '            dumpStatus $RUNTIME_AREA/$repo\n'
1081 <        txt += '            rm -f $RUNTIME_AREA/$repo \n'
1082 <        txt += '            echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1083 <        txt += '            echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1084 <        txt += '       fi\n'
1085 <        txt += '\n'
1086 <        txt += '       exit 1\n'
1087 <        txt += '   fi\n'
996 >        txt += '        source $OSG_APP/cmssoft/cms/cmsset_default.sh '+self.version+'\n'
997 >        txt += '    else\n'
998 >        txt += '        echo "ERROR ==> $OSG_APP/cmssoft/cms/cmsset_default.sh file not found"\n'
999 >        txt += '        job_exit_code=10020\n'
1000 >        txt += '        func_exit\n'
1001 >        txt += '    fi\n'
1002          txt += '\n'
1003 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1004 <        txt += '   echo " END SETUP CMS OSG  ENVIRONMENT "\n'
1003 >        txt += '    echo "==> setup cms environment ok"\n'
1004 >        txt += '    echo "SCRAM_ARCH = $SCRAM_ARCH"\n'
1005  
1006          return txt
1007 <
1094 <    ### OLI_DANIELE
1007 >
1008      def wsSetupCMSLCGEnvironment_(self):
1009          """
1010          Returns part of a job script which is prepares
1011          the execution environment and which is common for all CMS jobs.
1012          """
1013 <        txt  = '   \n'
1014 <        txt += '   echo " ### SETUP CMS LCG  ENVIRONMENT ### "\n'
1015 <        txt += '   if [ ! $VO_CMS_SW_DIR ] ;then\n'
1016 <        txt += '       echo "SET_CMS_ENV 10031 ==> ERROR CMS software dir not found on WN `hostname`"\n'
1017 <        txt += '       echo "JOB_EXIT_STATUS = 10031" \n'
1018 <        txt += '       echo "JobExitCode=10031" | tee -a $RUNTIME_AREA/$repo\n'
1019 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1020 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1021 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1022 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1023 <        txt += '       exit 1\n'
1024 <        txt += '   else\n'
1025 <        txt += '       echo "Sourcing environment... "\n'
1026 <        txt += '       if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1027 <        txt += '           echo "SET_CMS_ENV 10020 ==> ERROR cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1028 <        txt += '           echo "JOB_EXIT_STATUS = 10020"\n'
1029 <        txt += '           echo "JobExitCode=10020" | tee -a $RUNTIME_AREA/$repo\n'
1030 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1031 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1032 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1033 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1034 <        txt += '           exit 1\n'
1035 <        txt += '       fi\n'
1036 <        txt += '       echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1037 <        txt += '       source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1038 <        txt += '       result=$?\n'
1039 <        txt += '       if [ $result -ne 0 ]; then\n'
1040 <        txt += '           echo "SET_CMS_ENV 10032 ==> ERROR problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1041 <        txt += '           echo "JOB_EXIT_STATUS = 10032"\n'
1042 <        txt += '           echo "JobExitCode=10032" | tee -a $RUNTIME_AREA/$repo\n'
1043 <        txt += '           dumpStatus $RUNTIME_AREA/$repo\n'
1044 <        txt += '           rm -f $RUNTIME_AREA/$repo \n'
1045 <        txt += '           echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1046 <        txt += '           echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1047 <        txt += '           exit 1\n'
1048 <        txt += '       fi\n'
1049 <        txt += '   fi\n'
1050 <        txt += '   \n'
1051 <        txt += '   string=`cat /etc/redhat-release`\n'
1052 <        txt += '   echo $string\n'
1053 <        txt += '   if [[ $string = *alhalla* ]]; then\n'
1054 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1055 <        txt += '   elif [[ $string = *Enterprise* ]] || [[ $string = *cientific* ]]; then\n'
1056 <        txt += '       export SCRAM_ARCH=slc3_ia32_gcc323\n'
1057 <        txt += '       echo "SCRAM_ARCH= $SCRAM_ARCH"\n'
1058 <        txt += '   else\n'
1059 <        txt += '       echo "SET_CMS_ENV 10033 ==> ERROR OS unknown, LCG environment not initialized"\n'
1060 <        txt += '       echo "JOB_EXIT_STATUS = 10033"\n'
1061 <        txt += '       echo "JobExitCode=10033" | tee -a $RUNTIME_AREA/$repo\n'
1062 <        txt += '       dumpStatus $RUNTIME_AREA/$repo\n'
1063 <        txt += '       rm -f $RUNTIME_AREA/$repo \n'
1064 <        txt += '       echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
1065 <        txt += '       echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
1066 <        txt += '       exit 1\n'
1067 <        txt += '   fi\n'
1068 <        txt += '   echo "SET_CMS_ENV 0 ==> setup cms environment ok"\n'
1069 <        txt += '   echo "### END SETUP CMS LCG ENVIRONMENT ###"\n'
1013 >        txt = '\n#Written by cms_cmssw::wsSetupCMSLCGEnvironment_\n'
1014 >        txt += '    echo ">>> setup CMS LCG environment:"\n'
1015 >        txt += '    echo "set SCRAM ARCH and BUILD_ARCH to ' + self.executable_arch + ' ###"\n'
1016 >        txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
1017 >        txt += '    export BUILD_ARCH='+self.executable_arch+'\n'
1018 >        txt += '    if [ ! $VO_CMS_SW_DIR ] ;then\n'
1019 >        txt += '        echo "ERROR ==> CMS software dir not found on WN `hostname`"\n'
1020 >        txt += '        job_exit_code=10031\n'
1021 >        txt += '        func_exit\n'
1022 >        txt += '    else\n'
1023 >        txt += '        echo "Sourcing environment... "\n'
1024 >        txt += '        if [ ! -s $VO_CMS_SW_DIR/cmsset_default.sh ] ;then\n'
1025 >        txt += '            echo "ERROR ==> cmsset_default.sh file not found into dir $VO_CMS_SW_DIR"\n'
1026 >        txt += '            job_exit_code=10020\n'
1027 >        txt += '            func_exit\n'
1028 >        txt += '        fi\n'
1029 >        txt += '        echo "sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1030 >        txt += '        source $VO_CMS_SW_DIR/cmsset_default.sh\n'
1031 >        txt += '        result=$?\n'
1032 >        txt += '        if [ $result -ne 0 ]; then\n'
1033 >        txt += '            echo "ERROR ==> problem sourcing $VO_CMS_SW_DIR/cmsset_default.sh"\n'
1034 >        txt += '            job_exit_code=10032\n'
1035 >        txt += '            func_exit\n'
1036 >        txt += '        fi\n'
1037 >        txt += '    fi\n'
1038 >        txt += '    \n'
1039 >        txt += '    echo "==> setup cms environment ok"\n'
1040 >        return txt
1041 >
1042 >    def wsModifyReport(self, nj):
1043 >        """
1044 >        insert the part of the script that modifies the FrameworkJob Report
1045 >        """
1046 >
1047 >        txt = ''
1048 >        if (self.copy_data == 1):
1049 >            txt = '\n#Written by cms_cmssw::wsModifyReport\n'
1050 >
1051 >            txt += 'echo ">>> Modify Job Report:" \n'
1052 >            txt += 'chmod a+x $RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1053 >            txt += 'echo "CMSSW_VERSION = $CMSSW_VERSION"\n\n'
1054 >
1055 >            args = 'fjr $RUNTIME_AREA/crab_fjr_$NJob.xml json $RUNTIME_AREA/resultCopyFile n_job $OutUniqueID PrimaryDataset $PrimaryDataset  ApplicationFamily $ApplicationFamily ApplicationName $executable cmssw_version $CMSSW_VERSION psethash $PSETHASH'
1056 >
1057 >            if (self.publish_data == 1):
1058 >                txt += 'ProcessedDataset='+self.processedDataset+'\n'
1059 >                txt += 'echo "ProcessedDataset = $ProcessedDataset"\n'
1060 >                args += ' UserProcessedDataset $USER-$ProcessedDataset-$PSETHASH'
1061 >
1062 >            txt += 'echo "$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'"\n'
1063 >            txt += '$RUNTIME_AREA/ProdCommon/FwkJobRep/ModifyJobReport.py '+str(args)+'\n'
1064 >            txt += 'modifyReport_result=$?\n'
1065 >            txt += 'if [ $modifyReport_result -ne 0 ]; then\n'
1066 >            txt += '    modifyReport_result=70500\n'
1067 >            txt += '    job_exit_code=$modifyReport_result\n'
1068 >            txt += '    echo "ModifyReportResult=$modifyReport_result" | tee -a $RUNTIME_AREA/$repo\n'
1069 >            txt += '    echo "WARNING: Problem with ModifyJobReport"\n'
1070 >            txt += 'else\n'
1071 >            txt += '    mv NewFrameworkJobReport.xml $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1072 >            txt += 'fi\n'
1073 >        return txt
1074 >
1075 >    def wsParseFJR(self):
1076 >        """
1077 >        Parse the FrameworkJobReport to obtain useful infos
1078 >        """
1079 >        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1080 >        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1081 >        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1082 >        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1083 >        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n'
1084 >        if self.debug_wrapper==1 :
1085 >            txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1086 >        txt += '        executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n'
1087 >        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1088 >        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1089 >        txt += '            cat $RUNTIME_AREA/crab_fjr_$NJob.xml\n'
1090 >        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1091 >        txt += '        elif [ $executable_exit_status -eq -999 ];then\n'
1092 >        txt += '            echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n'
1093 >        txt += '        else\n'
1094 >        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1095 >        txt += '        fi\n'
1096 >        txt += '    else\n'
1097 >        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1098 >        txt += '    fi\n'
1099 >          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1100 >        txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1101 >        txt += '        echo ">>> Executable succeded  $executable_exit_status"\n'
1102 >        txt += '    fi\n'
1103 >        txt += 'else\n'
1104 >        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1105 >        txt += 'fi\n'
1106 >        txt += '\n'
1107 >        txt += 'if [ $executable_exit_status -ne 0 ];then\n'
1108 >        txt += '    echo ">>> Executable failed  $executable_exit_status"\n'
1109 >        txt += '    echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1110 >        txt += '    echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1111 >        txt += '    job_exit_code=$executable_exit_status\n'
1112 >        txt += '    func_exit\n'
1113 >        txt += 'fi\n\n'
1114 >        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1115 >        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1116 >        txt += 'job_exit_code=$executable_exit_status\n'
1117 >
1118          return txt
1119  
1120      def setParam_(self, param, value):
# Line 1162 | Line 1123 | class Cmssw(JobType):
1123      def getParams(self):
1124          return self._params
1125  
1126 <    def setTaskid_(self):
1127 <        self._taskId = self.cfg_params['taskId']
1128 <        
1129 <    def getTaskid(self):
1130 <        return self._taskId
1131 <
1132 < #######################################################################
1133 <    def uniquelist(self, old):
1134 <        """
1135 <        remove duplicates from a list
1136 <        """
1137 <        nd={}
1138 <        for e in old:
1139 <            nd[e]=0
1140 <        return nd.keys()
1126 >    def outList(self,list=False):
1127 >        """
1128 >        check the dimension of the output files
1129 >        """
1130 >        txt = ''
1131 >        txt += 'echo ">>> list of expected files on output sandbox"\n'
1132 >        listOutFiles = []
1133 >        stdout = 'CMSSW_$NJob.stdout'
1134 >        stderr = 'CMSSW_$NJob.stderr'
1135 >        if len(self.output_file) <= 0:
1136 >            msg ="WARNING: no output files name have been defined!!\n"
1137 >            msg+="\tno output files will be reported back/staged\n"
1138 >            common.logger.info(msg)
1139 >
1140 >        if (self.return_data == 1):
1141 >            for file in (self.output_file):
1142 >                listOutFiles.append(numberFile(file, '$OutUniqueID'))
1143 >        for file in (self.output_file_sandbox):
1144 >            listOutFiles.append(numberFile(file, '$NJob'))
1145 >        listOutFiles.append(stdout)
1146 >        listOutFiles.append(stderr)
1147 >
1148 >        txt += 'echo "output files: '+string.join(listOutFiles,' ')+'"\n'
1149 >        txt += 'filesToCheck="'+string.join(listOutFiles,' ')+'"\n'
1150 >        txt += 'export filesToCheck\n'
1151 >        taskinfo={}
1152 >        taskinfo['outfileBasename'] = self.output_file
1153 >        common._db.updateTask_(taskinfo)
1154 >
1155 >        if list : return self.output_file
1156 >        return txt
1157 >
1158 >    def checkCMSSWVersion(self, url = "https://cmstags.cern.ch/cgi-bin/CmsTC/", fileName = "ReleasesXML"):
1159 >        """
1160 >        compare current CMSSW release and arch with allowed releases
1161 >        """
1162 >
1163 >        downloader = Downloader(url)
1164 >        goodRelease = False
1165 >
1166 >        try:
1167 >            result = downloader.config(fileName)
1168 >        except:
1169 >            common.logger.info("ERROR: Problem reading file of allowed CMSSW releases.")
1170 >
1171 >        try:
1172 >            events = pulldom.parseString(result)
1173 >
1174 >            arch     = None
1175 >            release  = None
1176 >            relType  = None
1177 >            relState = None
1178 >            for (event, node) in events:
1179 >                if event == pulldom.START_ELEMENT:
1180 >                    if node.tagName == 'architecture':
1181 >                        arch = node.attributes.getNamedItem('name').nodeValue
1182 >                    if node.tagName == 'project':
1183 >                        relType = node.attributes.getNamedItem('type').nodeValue
1184 >                        relState = node.attributes.getNamedItem('state').nodeValue
1185 >                        if relType == 'Production' and relState == 'Announced':
1186 >                            release = node.attributes.getNamedItem('label').nodeValue
1187 >                if self.executable_arch == arch and self.version == release:
1188 >                    goodRelease = True
1189 >                    return goodRelease
1190 >
1191 >            if not goodRelease:
1192 >                msg = "WARNING: %s on %s is not a supported release. " % \
1193 >                        (self.version, self.executable_arch)
1194 >                msg += "Submission may fail."
1195 >                common.logger.info(msg)
1196 >        except:
1197 >            common.logger.info("Problems parsing file of allowed CMSSW releases.")
1198 >
1199 >        return goodRelease
1200 >

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines