ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/SchedulerRemoteglidein.py
(Generate patch)

Comparing COMP/CRAB/python/SchedulerRemoteglidein.py (file contents):
Revision 1.8 by belforte, Thu Nov 8 15:30:21 2012 UTC vs.
Revision 1.22 by belforte, Tue Sep 3 13:57:24 2013 UTC

# Line 5 | Line 5 | Implements the Remote Glidein scheduler
5   from SchedulerGrid  import SchedulerGrid
6   from crab_exceptions import CrabException
7   from crab_util import runCommand
8 + from crab_util import gethnUserNameFromSiteDB
9   from ServerConfig import *
9 from WMCore.SiteScreening.BlackWhiteListParser import SEBlackWhiteListParser
10   import Scram
11  
12   import common
# Line 40 | Line 40 | class SchedulerRemoteglidein(SchedulerGr
40  
41          self.environment_unique_identifier = None
42          self.submissionDay = time.strftime("%y%m%d",time.localtime())
43 <
43 >        
44          return
45  
46  
# Line 48 | Line 48 | class SchedulerRemoteglidein(SchedulerGr
48          """
49          Configure the scheduler with the config settings from the user
50          """
51 <        
51 >  
52 >        # this line needs to be before the call to SchedulerGrid.configure
53 >        # because that calls SchedulerRemoteglidin in turn and
54 >        # sshControlPersist needs to be defined then :-(
55 >        self.sshControlPersist =  cfg_params.get('USER.ssh_control_persist','3600')
56 >        if self.sshControlPersist.lower() == "no" or \
57 >                self.sshControlPersist.lower() == "yes" or \
58 >                self.sshControlPersist.isdigit() :
59 >            pass
60 >        else:
61 >            msg = "Error: invalid value '%s' for USER.ssh_control_persist " % \
62 >                self.sshControlPersist
63 >            raise CrabException(msg)
64 >
65          SchedulerGrid.configure(self, cfg_params)
66  
67          self.proxyValid=0
# Line 59 | Line 72 | class SchedulerRemoteglidein(SchedulerGr
72          self.role = cfg_params.get("GRID.role", None)
73          self.VO = cfg_params.get('GRID.virtual_organization','cms')
74          self.allowOverflow = cfg_params.get('GRID.allow_overflow', '1')
75 <        self.max_rss = cfg_params.get('GRID.max_rss','2300')
75 >        self.max_rss = cfg_params.get('GRID.max_rss','2000')
76  
77          self.checkProxy()
78  
# Line 108 | Line 121 | class SchedulerRemoteglidein(SchedulerGr
121              raise CrabException(msg)
122  
123          return
111    
112    def userName(self):
113        """ return the user name """
114        tmp=runCommand("voms-proxy-info -identity 2>/dev/null")
115        return tmp.strip()
124  
125 + #
126      def envUniqueID(self):
127          taskHash = sha1(common._db.queryTask('name')).hexdigest()
128          id = "https://" + socket.gethostname() + '/' + taskHash + "/${NJob}"
# Line 137 | Line 146 | class SchedulerRemoteglidein(SchedulerGr
146              seDest = self.blackWhiteListParser.expandList("T") # all of SiteDB
147  
148          seString=self.blackWhiteListParser.cleanForBlackWhiteList(seDest)
149 <
149 >        # beware SiteDB V2 API, explicely cast to string in case it is unicode
150 >        seString=str(seString)
151 >        
152          jobParams += '+DESIRED_SEs = "'+seString+'"; '
153  
154          scram = Scram.Scram(None)
# Line 147 | Line 158 | class SchedulerRemoteglidein(SchedulerGr
158          cmsver=re.split('_', cmsVersion)
159          numericCmsVersion = "%s%.2d%.2d" %(cmsver[1], int(cmsver[2]), int(cmsver[3]))
160  
161 +        # protect against datasetPath being None
162 +        jobParams += '+DESIRED_CMSDataset ="' + str(self.datasetPath) + '";'
163 +            
164          jobParams += '+DESIRED_CMSVersion ="' +cmsVersion+'";'
165          jobParams += '+DESIRED_CMSVersionNr ="' +numericCmsVersion+'";'
166          jobParams += '+DESIRED_CMSScramArch ="' +scramArch+'";'
167 +
168 +        userName = gethnUserNameFromSiteDB()
169 +        jobParams += '+AccountingGroup ="' + userName+'";'
170          
171          myscheddName = self.remoteHost
172 +
173          jobParams += '+Glidein_MonitorID = "https://'+ myscheddName + \
174                       '//' + self.submissionDay + '//$(Cluster).$(Process)"; '
175  
176          if (self.EDG_clock_time):
177 <            jobParams += '+MaxWallTimeMins = '+self.EDG_clock_time+'; '
177 >            glideinTime = "%d" % (int(self.EDG_clock_time)+20) # 20 min to wrapup
178 >            jobParams += '+MaxWallTimeMins = '+ glideinTime + '; '
179          else:
180 <            jobParams += '+MaxWallTimeMins = %d; ' % (60*24)
180 >            jobParams += '+MaxWallTimeMins = %d; ' % (21*60+55) #  21:55h  (unit = min)
181 >
182  
183          if self.max_rss :
184              jobParams += 'request_memory = '+self.max_rss+';'
# Line 174 | Line 194 | class SchedulerRemoteglidein(SchedulerGr
194  
195          common._db.updateTask_({'jobType':jobParams})
196  
177
197          return jobParams
198  
199  
# Line 183 | Line 202 | class SchedulerRemoteglidein(SchedulerGr
202          Return dictionary with specific parameters, to use with real scheduler
203          is called when scheduler is initialized in Boss, i.e. at each crab command
204          """
205 <        #SB this method is used to pass directory names to Boss Scheduler
205 >        #SB this method is used to pass informatinos to Boss Scheduler
206          # via params dictionary
207  
208          jobDir = common.work_space.jobDir()
# Line 193 | Line 212 | class SchedulerRemoteglidein(SchedulerGr
212          params = {'shareDir':shareDir,
213                    'jobDir':jobDir,
214                    'taskDir':taskDir,
215 <                  'submissionDay':self.submissionDay}
215 >                  'submissionDay':self.submissionDay,
216 >                  'sshControlPersist':self.sshControlPersist}
217  
218          return params
219  
# Line 238 | Line 258 | class SchedulerRemoteglidein(SchedulerGr
258  
259          txt += 'func_exit() { \n'
260          txt += self.wsExitFunc_common()
241
242        txt += '    tar zcvf ${out_files}.tgz  ${final_list}\n'
243        txt += '    tmp_size=`ls -gGrta ${out_files}.tgz | awk \'{ print $3 }\'`\n'
244        txt += '    rm ${out_files}.tgz\n'
245        txt += '    size=`expr $tmp_size`\n'
246        txt += '    echo "Total Output dimension: $size"\n'
247        txt += '    limit='+str(self.OSBsize) +' \n'
248        txt += '    echo "WARNING: output files size limit is set to: $limit"\n'
249        txt += '    if [ "$limit" -lt "$size" ]; then\n'
250        txt += '        exceed=1\n'
251        txt += '        job_exit_code=70000\n'
252        txt += '        echo "Output Sanbox too big. Produced output is lost "\n'
253        txt += '    else\n'
254        txt += '        exceed=0\n'
255        txt += '        echo "Total Output dimension $size is fine."\n'
256        txt += '    fi\n'
257
258        txt += '    echo "JOB_EXIT_STATUS = $job_exit_code"\n'
259        txt += '    echo "JobExitCode=$job_exit_code" >> $RUNTIME_AREA/$repo\n'
260        txt += '    dumpStatus $RUNTIME_AREA/$repo\n'
261        txt += '    if [ $exceed -ne 1 ]; then\n'
262        txt += '        tar zcvf ${out_files}.tgz  ${final_list}\n'
263        txt += '    else\n'
264        txt += '        tar zcvf ${out_files}.tgz CMSSW_${NJob}.stdout CMSSW_${NJob}.stderr\n'
265        txt += '    fi\n'
266        txt += '    python $RUNTIME_AREA/fillCrabFjr.py $RUNTIME_AREA/crab_fjr_$NJob.xml --errorcode $job_exit_code \n'
267
261          txt += '    exit $job_exit_code\n'
262          txt += '}\n'
263  
# Line 292 | Line 285 | class SchedulerRemoteglidein(SchedulerGr
285              remoteUserHost=str(task['serverName'])
286              common.logger.info("serverName from Task DB is %s" %
287                                 remoteUserHost)
295            if '@' in remoteUserHost:
296                remoteHost = remoteUserHost.split('@')[1]
297            else:
298                remoteHost = remoteUserHost
288          else:
289              if self.cfg_params.has_key('CRAB.submit_host'):
290                  # get a remote submission host from crab config file
291                  srvCfg=ServerConfig(self.cfg_params['CRAB.submit_host']).config()
292 <                remoteHost=srvCfg['serverName']
293 <                common.logger.info("remotehost from crab.cfg = %s" % remoteHost)
292 >                remoteUserHost=srvCfg['serverName']
293 >                common.logger.info("remotehost from crab.cfg = %s" % remoteUserHost)
294              else:
295                  # pick from Available Servers List
296                  srvCfg=ServerConfig('default').config()
297 <                remoteHost = srvCfg['serverName']
298 <                common.logger.info("remotehost from Avail.List = %s" % remoteHost)
297 >                remoteUserHost = srvCfg['serverName']
298 >                common.logger.info("remotehost from Avail.List = %s" % remoteUserHost)
299  
300 <            if not remoteHost:
300 >            if not remoteUserHost:
301                  raise CrabException('FATAL ERROR: remoteHost not defined')
313            
314            #common.logger.info("try to find out username for remote Host via uberftp ...")
315            #command="uberftp %s pwd|grep User|awk '{print $3}'" % remoteHost
316            #(status, output) = commands.getstatusoutput(command)
317            #if status == 0:
318            #    remoteUser = output
319            #    common.logger.info("remoteUser set to %s" % remoteUser)
320            #    if remoteUser==None:
321            #        raise CrabException('FATAL ERROR: REMOTE USER not defined')
302  
303 <            #remoteUserHost = remoteUser + '@' + remoteHost
304 <            remoteUserHost = remoteHost
303 >        if '@' in remoteUserHost:
304 >            remoteHost = remoteUserHost.split('@')[1]
305 >        else:
306 >            remoteHost = remoteUserHost
307  
308          common._db.updateTask_({'serverName':remoteUserHost})
309  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines