ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/SchedulerRemoteglidein.py
(Generate patch)

Comparing COMP/CRAB/python/SchedulerRemoteglidein.py (file contents):
Revision 1.8 by belforte, Thu Nov 8 15:30:21 2012 UTC vs.
Revision 1.19 by belforte, Fri Aug 9 08:47:30 2013 UTC

# Line 5 | Line 5 | Implements the Remote Glidein scheduler
5   from SchedulerGrid  import SchedulerGrid
6   from crab_exceptions import CrabException
7   from crab_util import runCommand
8 + from crab_util import gethnUserNameFromSiteDB
9   from ServerConfig import *
9 from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
10   import Scram
11  
12   import common
# Line 40 | Line 40 | class SchedulerRemoteglidein(SchedulerGr
40  
41          self.environment_unique_identifier = None
42          self.submissionDay = time.strftime("%y%m%d",time.localtime())
43 <
43 >        
44          return
45  
46  
# Line 48 | Line 48 | class SchedulerRemoteglidein(SchedulerGr
48          """
49          Configure the scheduler with the config settings from the user
50          """
51 <        
51 >  
52 >        # this line needs to be before the call to SchedulerGrid.configure
53 >        # because that calls SchedulerRemoteglidin in turn and
54 >        # sshControlPersist needs to be defined then :-(
55 >        self.sshControlPersist =  cfg_params.get('USER.ssh_control_persist','3600')
56 >        if self.sshControlPersist.lower() == "no" or \
57 >                self.sshControlPersist.lower() == "yes" or \
58 >                self.sshControlPersist.isdigit() :
59 >            pass
60 >        else:
61 >            msg = "Error: invalid value '%s' for USER.ssh_control_persist " % \
62 >                self.sshControlPersist
63 >            raise CrabException(msg)
64 >
65          SchedulerGrid.configure(self, cfg_params)
66  
67          self.proxyValid=0
# Line 59 | Line 72 | class SchedulerRemoteglidein(SchedulerGr
72          self.role = cfg_params.get("GRID.role", None)
73          self.VO = cfg_params.get('GRID.virtual_organization','cms')
74          self.allowOverflow = cfg_params.get('GRID.allow_overflow', '1')
75 <        self.max_rss = cfg_params.get('GRID.max_rss','2300')
75 >        self.max_rss = cfg_params.get('GRID.max_rss','2000')
76  
77          self.checkProxy()
78  
# Line 108 | Line 121 | class SchedulerRemoteglidein(SchedulerGr
121              raise CrabException(msg)
122  
123          return
111    
112    def userName(self):
113        """ return the user name """
114        tmp=runCommand("voms-proxy-info -identity 2>/dev/null")
115        return tmp.strip()
124  
125 + #
126      def envUniqueID(self):
127          taskHash = sha1(common._db.queryTask('name')).hexdigest()
128          id = "https://" + socket.gethostname() + '/' + taskHash + "/${NJob}"
# Line 147 | Line 156 | class SchedulerRemoteglidein(SchedulerGr
156          cmsver=re.split('_', cmsVersion)
157          numericCmsVersion = "%s%.2d%.2d" %(cmsver[1], int(cmsver[2]), int(cmsver[3]))
158  
159 +        jobParams += '+DESIRED_CMSDataset ="' +self.datasetPath+'";'
160          jobParams += '+DESIRED_CMSVersion ="' +cmsVersion+'";'
161          jobParams += '+DESIRED_CMSVersionNr ="' +numericCmsVersion+'";'
162          jobParams += '+DESIRED_CMSScramArch ="' +scramArch+'";'
163 +
164 +        userName = gethnUserNameFromSiteDB()
165 +        jobParams += '+AccountingGroup ="' + userName+'";'
166          
167          myscheddName = self.remoteHost
168 +
169          jobParams += '+Glidein_MonitorID = "https://'+ myscheddName + \
170                       '//' + self.submissionDay + '//$(Cluster).$(Process)"; '
171  
172          if (self.EDG_clock_time):
173 <            jobParams += '+MaxWallTimeMins = '+self.EDG_clock_time+'; '
173 >            glideinTime = "%d" % (int(self.EDG_clock_time)+20) # 20 min to wrapup
174 >            jobParams += '+MaxWallTimeMins = '+ glideinTime + '; '
175          else:
176 <            jobParams += '+MaxWallTimeMins = %d; ' % (60*24)
176 >            jobParams += '+MaxWallTimeMins = %d; ' % (21*60+55) #  21:55h  (unit = min)
177 >
178  
179          if self.max_rss :
180              jobParams += 'request_memory = '+self.max_rss+';'
# Line 174 | Line 190 | class SchedulerRemoteglidein(SchedulerGr
190  
191          common._db.updateTask_({'jobType':jobParams})
192  
177
193          return jobParams
194  
195  
# Line 183 | Line 198 | class SchedulerRemoteglidein(SchedulerGr
198          Return dictionary with specific parameters, to use with real scheduler
199          is called when scheduler is initialized in Boss, i.e. at each crab command
200          """
201 <        #SB this method is used to pass directory names to Boss Scheduler
201 >        #SB this method is used to pass informatinos to Boss Scheduler
202          # via params dictionary
203  
204          jobDir = common.work_space.jobDir()
# Line 193 | Line 208 | class SchedulerRemoteglidein(SchedulerGr
208          params = {'shareDir':shareDir,
209                    'jobDir':jobDir,
210                    'taskDir':taskDir,
211 <                  'submissionDay':self.submissionDay}
211 >                  'submissionDay':self.submissionDay,
212 >                  'sshControlPersist':self.sshControlPersist}
213  
214          return params
215  
# Line 238 | Line 254 | class SchedulerRemoteglidein(SchedulerGr
254  
255          txt += 'func_exit() { \n'
256          txt += self.wsExitFunc_common()
241
242        txt += '    tar zcvf ${out_files}.tgz  ${final_list}\n'
243        txt += '    tmp_size=`ls -gGrta ${out_files}.tgz | awk \'{ print $3 }\'`\n'
244        txt += '    rm ${out_files}.tgz\n'
245        txt += '    size=`expr $tmp_size`\n'
246        txt += '    echo "Total Output dimension: $size"\n'
247        txt += '    limit='+str(self.OSBsize) +' \n'
248        txt += '    echo "WARNING: output files size limit is set to: $limit"\n'
249        txt += '    if [ "$limit" -lt "$size" ]; then\n'
250        txt += '        exceed=1\n'
251        txt += '        job_exit_code=70000\n'
252        txt += '        echo "Output Sanbox too big. Produced output is lost "\n'
253        txt += '    else\n'
254        txt += '        exceed=0\n'
255        txt += '        echo "Total Output dimension $size is fine."\n'
256        txt += '    fi\n'
257
258        txt += '    echo "JOB_EXIT_STATUS = $job_exit_code"\n'
259        txt += '    echo "JobExitCode=$job_exit_code" >> $RUNTIME_AREA/$repo\n'
260        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
261        txt += '    if [ $exceed -ne 1 ]; then\n'
262        txt += '        tar zcvf ${out_files}.tgz  ${final_list}\n'
263        txt += '    else\n'
264        txt += '        tar zcvf ${out_files}.tgz CMSSW_${NJob}.stdout CMSSW_${NJob}.stderr\n'
265        txt += '    fi\n'
266        txt += '    python $RUNTIME_AREA/fillCrabFjr.py $RUNTIME_AREA/crab_fjr_$NJob.xml --errorcode $job_exit_code \n'
267
257          txt += '    exit $job_exit_code\n'
258          txt += '}\n'
259  
# Line 292 | Line 281 | class SchedulerRemoteglidein(SchedulerGr
281              remoteUserHost=str(task['serverName'])
282              common.logger.info("serverName from Task DB is %s" %
283                                 remoteUserHost)
295            if '@' in remoteUserHost:
296                remoteHost = remoteUserHost.split('@')[1]
297            else:
298                remoteHost = remoteUserHost
284          else:
285              if self.cfg_params.has_key('CRAB.submit_host'):
286                  # get a remote submission host from crab config file
287                  srvCfg=ServerConfig(self.cfg_params['CRAB.submit_host']).config()
288 <                remoteHost=srvCfg['serverName']
289 <                common.logger.info("remotehost from crab.cfg = %s" % remoteHost)
288 >                remoteUserHost=srvCfg['serverName']
289 >                common.logger.info("remotehost from crab.cfg = %s" % remoteUserHost)
290              else:
291                  # pick from Available Servers List
292                  srvCfg=ServerConfig('default').config()
293 <                remoteHost = srvCfg['serverName']
294 <                common.logger.info("remotehost from Avail.List = %s" % remoteHost)
293 >                remoteUserHost = srvCfg['serverName']
294 >                common.logger.info("remotehost from Avail.List = %s" % remoteUserHost)
295  
296 <            if not remoteHost:
296 >            if not remoteUserHost:
297                  raise CrabException('FATAL ERROR: remoteHost not defined')
313            
314            #common.logger.info("try to find out username for remote Host via uberftp ...")
315            #command="uberftp %s pwd|grep User|awk '{print $3}'" % remoteHost
316            #(status, output) = commands.getstatusoutput(command)
317            #if status == 0:
318            #    remoteUser = output
319            #    common.logger.info("remoteUser set to %s" % remoteUser)
320            #    if remoteUser==None:
321            #        raise CrabException('FATAL ERROR: REMOTE USER not defined')
298  
299 <            #remoteUserHost = remoteUser + '@' + remoteHost
300 <            remoteUserHost = remoteHost
299 >        if '@' in remoteUserHost:
300 >            remoteHost = remoteUserHost.split('@')[1]
301 >        else:
302 >            remoteHost = remoteUserHost
303  
304          common._db.updateTask_({'serverName':remoteUserHost})
305  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines