ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/SchedulerEdg.py
(Generate patch)

Comparing COMP/CRAB/python/SchedulerEdg.py (file contents):
Revision 1.55 by corvo, Wed May 3 08:44:30 2006 UTC vs.
Revision 1.69 by fanzago, Tue Jul 4 12:58:15 2006 UTC

# Line 152 | Line 152 | class SchedulerEdg(Scheduler):
152          sys.path.append(libPath)
153  
154          self.proxyValid=0
155 +
156 +        try:
157 +            self._taskId = cfg_params['taskId']
158 +        except:
159 +            self._taskId = ''
160 +
161          return
162      
163  
# Line 175 | Line 181 | class SchedulerEdg(Scheduler):
181          Returns part of a job script which does scheduler-specific work.
182          """
183          txt = ''
184 +        txt += "# job number (first parameter for job wrapper)\n"
185 +        txt += "NJob=$1\n"
186 +
187 +        txt += '# job identification to DashBoard \n'
188 +        txt += 'MonitorJobID=`echo ${NJob}_$EDG_WL_JOBID`\n'
189 +        txt += 'SyncGridJobId=`echo $EDG_WL_JOBID`\n'
190 +        txt += 'MonitorID=`echo ' + self._taskId + '`\n'
191 +        txt += 'echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
192 +        txt += 'echo "SyncGridJobId=`echo $SyncGridJobId`" | tee -a $RUNTIME_AREA/$repo \n'
193 +        txt += 'echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
194 +
195          txt += 'echo "middleware discovery " \n'
196 <        txt += 'if [ $VO_CMS_SW_DIR ]; then\n'
196 >        txt += 'if [ $VO_CMS_SW_DIR ]; then \n'
197          txt += '    middleware=LCG \n'
198 +        txt += '    echo "SyncCE=`edg-brokerinfo getCE`" | tee -a $RUNTIME_AREA/$repo \n'
199 +        txt += '    echo "GridFlavour=`echo $middleware`" | tee -a $RUNTIME_AREA/$repo \n'
200          txt += '    echo "middleware =$middleware" \n'
201          txt += 'elif [ $GRID3_APP_DIR ]; then\n'
202          txt += '    middleware=OSG \n'
203 +        txt += '    echo "SyncCE=`echo $EDG_WL_LOG_DESTINATION`" | tee -a $RUNTIME_AREA/$repo \n'
204 +        txt += '    echo "GridFlavour=`echo $middleware`" | tee -a $RUNTIME_AREA/$repo \n'
205          txt += '    echo "middleware =$middleware" \n'
206          txt += 'elif [ $OSG_APP ]; then \n'
207          txt += '    middleware=OSG \n'
208 +        txt += '    echo "SyncCE=`echo $EDG_WL_LOG_DESTINATION`" | tee -a $RUNTIME_AREA/$repo \n'
209 +        txt += '    echo "GridFlavour=`echo $middleware`" | tee -a $RUNTIME_AREA/$repo \n'
210          txt += '    echo "middleware =$middleware" \n'
211          txt += 'else \n'
212 <        txt += '    echo "SET_CMS_ENV 1 ==> middleware not identified" \n'
213 <        txt += '    echo "JOB_EXIT_STATUS = 1"\n'
214 <        txt += '    exit 1\n'
215 <        txt += 'fi\n'
216 <
212 >        txt += '    echo "SET_CMS_ENV 10030 ==> middleware not identified" \n'
213 >        txt += '    echo "JOB_EXIT_STATUS = 10030" \n'
214 >        txt += '    echo "JobExitCode=10030" | tee -a $RUNTIME_AREA/$repo \n'
215 >        txt += '    dumpStatus $RUNTIME_AREA/$repo \n'
216 >        txt += '    rm -f $RUNTIME_AREA/$repo \n'
217 >        txt += '    echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
218 >        txt += '    echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
219 >        txt += '    exit 1 \n'
220 >        txt += 'fi \n'
221 >
222 >        txt += '# report first time to DashBoard \n'
223 >        txt += 'dumpStatus $RUNTIME_AREA/$repo \n'
224 >        txt += 'rm -f $RUNTIME_AREA/$repo \n'
225 >        txt += 'echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
226 >        txt += 'echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
227 >        
228          txt += '\n\n'
229  
196        txt += 'if [ $middleware == LCG ]; then \n'
197        txt += '    echo "SyncGridJobId=`echo $EDG_WL_JOBID`" | tee -a $RUNTIME_AREA/$repo\n'
198        txt += 'fi\n'
199
230          if int(self.copy_data) == 1:
231             if self.SE:
232                txt += 'export SE='+self.SE+'\n'
# Line 256 | Line 286 | class SchedulerEdg(Scheduler):
286          txt += '    if [ $OSG_JOB_CONTACT ]; then \n'
287          txt += '        CE=`echo $OSG_JOB_CONTACT | /usr/bin/awk -F\/ \'{print $1}\'` \n'
288          txt += '    else \n'
289 <        txt += '        echo "SET_ENV 1 ==> ERROR in setting CE name - OSG mode -" \n'
289 >        txt += '        echo "SET_CMS_ENV 10099 ==> OSG mode: ERROR in setting CE name from OSG_JOB_CONTACT" \n'
290 >        txt += '        echo "JOB_EXIT_STATUS = 10099" \n'
291 >        txt += '        echo "JobExitCode=10099" | tee -a $RUNTIME_AREA/$repo \n'
292 >        txt += '        dumpStatus $RUNTIME_AREA/$repo \n'
293 >        txt += '        rm -f $RUNTIME_AREA/$repo \n'
294 >        txt += '        echo "MonitorJobID=`echo $MonitorJobID`" | tee -a $RUNTIME_AREA/$repo \n'
295 >        txt += '        echo "MonitorID=`echo $MonitorID`" | tee -a $RUNTIME_AREA/$repo\n'
296          txt += '        exit 1 \n'
297          txt += '    fi \n'
298          txt += 'fi \n'
# Line 286 | Line 322 | class SchedulerEdg(Scheduler):
322   ### changed by georgia (put a loop copying more than one input files per jobs)          
323             txt += '   for input_file in $cur_file_list \n'
324             txt += '   do \n'
325 <           txt += '    lcg-cp --vo $VO lfn:$input_lfn/$input_file file:`pwd`/$input_file 2>&1\n'
326 <           txt += '    copy_input_exit_status=$?\n'
327 <           txt += '    echo "COPY_INPUT_EXIT_STATUS = $copy_input_exit_status"\n'
328 <           txt += '    if [ $copy_input_exit_status -ne 0 ]; then \n'
329 <           txt += '       echo "Problems with copying to WN" \n'
330 <           txt += '    else \n'
331 <           txt += '       echo "input copied into WN" \n'
332 <           txt += '    fi \n'
325 >           #### FEDE
326 >           #txt += '      echo "which lcg-cp" \n'
327 >           #txt += '      which lcg-cp \n'
328 >           #########
329 >           txt += '      lcg-cp --vo $VO --verbose lfn:$input_lfn/$input_file file:`pwd`/$input_file 2>&1\n'
330 >           txt += '      copy_input_exit_status=$?\n'
331 >           txt += '      echo "COPY_INPUT_EXIT_STATUS = $copy_input_exit_status"\n'
332 >           txt += '      if [ $copy_input_exit_status -ne 0 ]; then \n'
333 >           txt += '         echo "Problems with copying to WN" \n'
334 >           txt += '      else \n'
335 >           txt += '         echo "input copied into WN" \n'
336 >           txt += '      fi \n'
337             txt += '   done \n'
338   ### copy a set of PU ntuples (same for each jobs -- but accessed randomly)
339             txt += '   for file in $cur_pu_list \n'
340             txt += '   do \n'
341 <           txt += '    lcg-cp --vo $VO lfn:$pu_lfn/$file file:`pwd`/$file 2>&1\n'
342 <           txt += '    copy_input_exit_status=$?\n'
343 <           txt += '    echo "COPY_INPUT_PU_EXIT_STATUS = $copy_input_pu_exit_status"\n'
344 <           txt += '    if [ $copy_input_pu_exit_status -ne 0 ]; then \n'
345 <           txt += '       echo "Problems with copying pu to WN" \n'
346 <           txt += '    else \n'
347 <           txt += '       echo "input pu files copied into WN" \n'
348 <           txt += '    fi \n'
341 >           #### FEDE
342 >           #txt += '      echo "which lcg-cp" \n'
343 >           #txt += '      which lcg-cp \n'
344 >           #########
345 >           txt += '      lcg-cp --vo $VO --verbose lfn:$pu_lfn/$file file:`pwd`/$file 2>&1\n'
346 >           txt += '      copy_input_exit_status=$?\n'
347 >           txt += '      echo "COPY_INPUT_PU_EXIT_STATUS = $copy_input_pu_exit_status"\n'
348 >           txt += '      if [ $copy_input_pu_exit_status -ne 0 ]; then \n'
349 >           txt += '         echo "Problems with copying pu to WN" \n'
350 >           txt += '      else \n'
351 >           txt += '         echo "input pu files copied into WN" \n'
352 >           txt += '      fi \n'
353             txt += '   done \n'
354             txt += '   \n'
355             txt += '   ### Check SCRATCH space available on WN : \n'
# Line 324 | Line 368 | class SchedulerEdg(Scheduler):
368             txt += '#\n'
369             txt += '#   Copy output to SE = $SE\n'
370             txt += '#\n'
371 <           txt += 'if [ $exe_result -eq 0 ]; then\n'
371 >           #txt += 'if [ $exe_result -eq 0 ]; then\n'
372             txt += '    for out_file in $file_list ; do\n'
373             txt += '        echo "Trying to copy output file to $SE "\n'
374             ## OLI_Daniele globus-* for OSG, lcg-* for LCG
375             txt += '        if [ $middleware == OSG ]; then\n'
376             txt += '           echo "globus-url-copy file://`pwd`/$out_file gsiftp://${SE}${SE_PATH}$out_file"\n'
377 <           txt += '           copy_exit_status=`globus-url-copy file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n'
378 <           #txt += '           exitstring=`globus-url-copy file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n'
377 >           txt += '           globus-url-copy file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1 \n'
378 >           txt += '           copy_exit_status=$? \n'
379             txt += '        elif [ $middleware == LCG ]; then \n'
380 +           txt += '           echo "#######################################" \n'
381 +           txt += '           echo "#######################################" \n'
382 +           txt += '           echo "directory: " \n'
383 +           txt += '           pwd \n'
384 +           txt += '           echo "files:" \n'
385 +           txt += '           ls \n'
386 +           txt += '           echo "#######################################" \n'
387 +           txt += '           echo "#######################################" \n'
388 +           #### FEDE
389 +           #txt += '           echo "which lcg-cp" \n'
390 +           #txt += '           which lcg-cp \n'
391 +           #########
392             txt += '           echo "lcg-cp --vo cms -t 1200 file://`pwd`/$out_file gsiftp://${SE}${SE_PATH}$out_file"\n'
393 <           txt += '           copy_exit_status=`lcg-cp --vo cms -t 1200 file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n'
394 <           #txt += '           exitstring=`lcg-cp --vo cms -t 30 file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1`\n'
393 >           txt += '           lcg-cp --vo cms --verbose -t 1200 file://\`pwd\`/$out_file gsiftp://${SE}${SE_PATH}$out_file 2>&1\n'
394 >           txt += '           copy_exit_status=$? \n'
395             txt += '        fi \n'
340           #txt += '        copy_exit_status=$?\n'
396             txt += '        echo "COPY_EXIT_STATUS = $copy_exit_status"\n'
397             txt += '        echo "STAGE_OUT = $copy_exit_status"\n'
398 +           txt += '        exit_status=$copy_exit_status\n'
399             txt += '        if [ $copy_exit_status -ne 0 ]; then\n'
400             txt += '            echo "Problems with SE = $SE"\n'
401             txt += '            echo "StageOutExitStatus = 198" | tee -a $RUNTIME_AREA/$repo\n'
# Line 351 | Line 407 | class SchedulerEdg(Scheduler):
407             txt += '            echo "StageOutExitStatus = 0" | tee -a $RUNTIME_AREA/$repo\n'
408             txt += '         fi\n'
409             txt += '     done\n'
410 <           txt += 'fi\n'
410 >           #txt += 'fi\n'
411          return txt
412  
413      def wsRegisterOutput(self):
# Line 371 | Line 427 | class SchedulerEdg(Scheduler):
427             txt += '#\n'
428             txt += '#  Register output to LFC\n'
429             txt += '#\n'
430 <           txt += '   if [[ $exe_result -eq 0 && $copy_exit_status -eq 0 ]]; then\n'
430 >           #txt += '   if [[ $exe_result -eq 0 && $copy_exit_status -eq 0 ]]; then\n'
431 >           txt += '   if [ $copy_exit_status -eq 0 ]; then\n'
432             txt += '      for out_file in $file_list ; do\n'
433             txt += '         echo "Trying to register the output file into LFC"\n'
434 +           #### FEDE
435 +           #txt += '         echo "which lcg-rf" \n'
436 +           #txt += '         which lcg-rf \n'
437 +           #########
438             txt += '         echo "lcg-rf -l $LFN/$out_file --vo $VO sfn://$SE$SE_PATH/$out_file"\n'
439             txt += '         lcg-rf -l $LFN/$out_file --vo $VO sfn://$SE$SE_PATH/$out_file 2>&1 \n'
440             txt += '         register_exit_status=$?\n'
# Line 382 | Line 443 | class SchedulerEdg(Scheduler):
443             txt += '         if [ $register_exit_status -ne 0 ]; then \n'
444             txt += '            echo "Problems with the registration to LFC" \n'
445             txt += '            echo "Try with srm protocol" \n'
446 +           #### FEDE
447 +           #txt += '            echo "which lcg-rf" \n'
448 +           #txt += '            which lcg-rf \n'
449 +           #########
450             txt += '            echo "lcg-rf -l $LFN/$out_file --vo $VO srm://$SE$SE_PATH/$out_file"\n'
451             txt += '            lcg-rf -l $LFN/$out_file --vo $VO srm://$SE$SE_PATH/$out_file 2>&1 \n'
452             txt += '            register_exit_status=$?\n'
# Line 395 | Line 460 | class SchedulerEdg(Scheduler):
460             txt += '         fi \n'
461             txt += '         echo "StageOutExitStatus = $register_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
462             txt += '      done\n'
463 <           txt += '   elif [[ $exe_result -eq 0 && $copy_exit_status -ne 0 ]]; then \n'
463 >           #txt += '   elif [[ $exe_result -eq 0 && $copy_exit_status -ne 0 ]]; then \n'
464 >           txt += '   else \n'
465             txt += '      echo "Trying to copy output file to CloseSE"\n'
466             txt += '      CLOSE_SE=`edg-brokerinfo getCloseSEs | head -1`\n'
467             txt += '      for out_file in $file_list ; do\n'
468 +           #### FEDE
469 +           #txt += '         echo "which lcg-cr" \n'
470 +           #txt += '         which lcg-cr \n'
471 +           #########
472             txt += '         echo "lcg-cr -v -l lfn:${LFN}/$out_file -d $CLOSE_SE -P $LFN/$out_file --vo $VO file://`pwd`/$out_file" \n'
473             txt += '         lcg-cr -v -l lfn:${LFN}/$out_file -d $CLOSE_SE -P $LFN/$out_file --vo $VO file://`pwd`/$out_file 2>&1 \n'
474             txt += '         register_exit_status=$?\n'
475             txt += '         echo "REGISTER_EXIT_STATUS = $register_exit_status"\n'
476             txt += '         echo "STAGE_OUT = $register_exit_status"\n'
477             txt += '         if [ $register_exit_status -ne 0 ]; then \n'
478 <           txt += '            echo "Problems with CloseSE" \n'
478 >           txt += '            echo "Problems with CloseSE or Catalog" \n'
479             txt += '         else \n'
480             txt += '            echo "The program was successfully executed"\n'
481             txt += '            echo "SE = $CLOSE_SE"\n'
# Line 413 | Line 483 | class SchedulerEdg(Scheduler):
483             txt += '         fi \n'
484             txt += '         echo "StageOutExitStatus = $register_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
485             txt += '      done\n'
486 <           txt += '   else\n'
487 <           txt += '      echo "Problem with the executable"\n'
486 >           #txt += '   else\n'
487 >           #txt += '      echo "Problem with the executable"\n'
488             txt += '   fi \n'
489 +           txt += '   exit_status=$register_exit_status\n'
490             txt += 'fi \n'
491          return txt
492  
# Line 497 | Line 568 | class SchedulerEdg(Scheduler):
568          [sites.append(it) for it in CEs if not sites.count(it)]
569  
570          common.logger.debug(5,"All Sites :"+str(sites))
571 +        common.logger.message("Matched Sites :"+str(sites))
572          return len(sites)
573  
574      def noMatchFound_(self, jdl):
# Line 633 | Line 705 | class SchedulerEdg(Scheduler):
705          jdl.write(jt_string)
706  
707          ### only one .sh  JDL has arguments:
636        firstEvent = common.jobDB.firstEvent(nj)
637        maxEvents = common.jobDB.maxEvents(nj)
638        jdl.write('Arguments = "' + str(nj+1)+' '+str(firstEvent)+' '+str(maxEvents)+'";\n')
708  
709 +        ### Fabio
710 +        jdl.write('Arguments = "' + str(nj+1)+' '+ jbt.getJobTypeArguments(nj, "EDG") +'";\n')
711          inp_box = 'InputSandbox = { '
712          inp_box = inp_box + '"' + script + '",'
713  
# Line 694 | Line 765 | class SchedulerEdg(Scheduler):
765  
766  
767          req='Requirements = '
768 +        noreq=req
769          req = req + jbt.getRequirements()
698 #        ### if at least a CE exists ...
699 #        if common.analisys_common_info['sites']:
700 #           if common.analisys_common_info['sw_version']:
701 #                req='Requirements = '
702 #                req=req + 'Member("VO-cms-' + \
703 #                     common.analisys_common_info['sw_version'] + \
704 #                     '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
705 #            if len(common.analisys_common_info['sites'])>0:
706 #                req = req + ' && ('
707 #                for i in range(len(common.analisys_common_info['sites'])):
708 #                    req = req + 'other.GlueCEInfoHostName == "' \
709 #                         + common.analisys_common_info['sites'][i] + '"'
710 #                    if ( i < (int(len(common.analisys_common_info['sites']) - 1)) ):
711 #                        req = req + ' || '
712 #            req = req + ')'
770          #### and USER REQUIREMENT
771          if self.EDG_requirements:
772 <            if (req == 'Requirement = '):
773 <                req = req + self.EDG_requirements
774 <            else:
718 <                req = req +  ' && ' + self.EDG_requirements
772 >            if (req != noreq):
773 >                req = req +  ' && '
774 >            req = req + self.EDG_requirements
775          #### FEDE #####
776          if self.EDG_ce_white_list:
777              ce_white_list = string.split(self.EDG_ce_white_list,',')
778              #print "req = ", req
779              for i in range(len(ce_white_list)):
780                  if i == 0:
781 <                    if (req == 'Requirement = '):
782 <                        req = req + '((RegExp("' + ce_white_list[i] + '", other.GlueCEUniqueId))'
783 <                    else:
728 <                        req = req +  ' && ((RegExp("' + ce_white_list[i] + '", other.GlueCEUniqueId))'
781 >                    if (req != noreq):
782 >                        req = req +  ' && '
783 >                    req = req + '((RegExp("' + ce_white_list[i] + '", other.GlueCEUniqueId))'
784                      pass
785                  else:
786                      req = req +  ' || (RegExp("' + ce_white_list[i] + '", other.GlueCEUniqueId))'
# Line 734 | Line 789 | class SchedulerEdg(Scheduler):
789          if self.EDG_ce_black_list:
790              ce_black_list = string.split(self.EDG_ce_black_list,',')
791              for ce in ce_black_list:
792 <                if (req == 'Requirement = '):
793 <                    req = req + '(!RegExp("' + ce + '", other.GlueCEUniqueId))'
794 <                else:
740 <                    req = req +  ' && (!RegExp("' + ce + '", other.GlueCEUniqueId))'
792 >                if (req != noreq):
793 >                    req = req +  ' && '
794 >                req = req + '(!RegExp("' + ce + '", other.GlueCEUniqueId))'
795                  pass
796 +
797          ###############
798 +        clockTime=480
799          if self.EDG_clock_time:
800 <            if (req == 'Requirement = '):
801 <                req = req + 'other.GlueCEPolicyMaxWallClockTime>='+self.EDG_clock_time
802 <            else:
803 <                req = req + ' && other.GlueCEPolicyMaxWallClockTime>='+self.EDG_clock_time
800 >            clockTime= self.EDG_clock_time
801 >        if (req != noreq):
802 >            req = req + ' && '
803 >        req = req + '((other.GlueCEPolicyMaxWallClockTime == 0) || (other.GlueCEPolicyMaxWallClockTime>='+str(clockTime)+'))'
804  
805 +        cpuTime=1000
806          if self.EDG_cpu_time:
807 <            if (req == 'Requirement = '):
808 <                req = req + ' other.GlueCEPolicyMaxCPUTime>='+self.EDG_cpu_time
809 <            else:
810 <                req = req + ' && other.GlueCEPolicyMaxCPUTime>='+self.EDG_cpu_time
811 <        if (req != 'Requirement = '):
807 >            cpuTime=self.EDG_cpu_time
808 >        if (req != noreq):
809 >            req = req + ' && '
810 >        req = req + '((other.GlueCEPolicyMaxCPUTime == 0) || (other.GlueCEPolicyMaxCPUTime>='+str(cpuTime)+'))'
811 >
812 >        if (req != noreq):
813              req = req + ';\n'
814              jdl.write(req)
815                                                                                                                                                              
# Line 779 | Line 837 | class SchedulerEdg(Scheduler):
837          #cmd = 'voms-proxy-info -exists -valid '+str(minTimeLeft)+':00'
838          #cmd = 'voms-proxy-info -timeleft'
839          mustRenew = 0
840 <        timeLeftLocal = runCommand('voms-proxy-info -timeleft')
840 >        timeLeftLocal = runCommand('voms-proxy-info -timeleft 2>/dev/null')
841          timeLeftServer = -999
842          if not timeLeftLocal or int(timeLeftLocal) <= 0 or not isInt(timeLeftLocal):
843              mustRenew = 1
844          else:
845 <            timeLeftServer = runCommand('voms-proxy-info -actimeleft | head -1')
845 >            timeLeftServer = runCommand('voms-proxy-info -actimeleft 2>/dev/null | head -1')
846              if not timeLeftServer or not isInt(timeLeftServer):
847                  mustRenew = 1
848              elif timeLeftLocal<minTimeLeft or timeLeftServer<minTimeLeft:
# Line 793 | Line 851 | class SchedulerEdg(Scheduler):
851          pass
852  
853          if mustRenew:
854 <            common.logger.message( "No valid proxy found or timeleft too short!\n Creating a user proxy with default length of 24h\n")
855 <            cmd = 'voms-proxy-init -voms cms -valid 24:00'
854 >            common.logger.message( "No valid proxy found or remaining time of validity of already existing proxy shorter than 10 hours!\n Creating a user proxy with default length of 96h\n")
855 >            cmd = 'voms-proxy-init -voms cms -valid 96:00'
856              try:
857                  # SL as above: damn it!
858                  out = os.system(cmd)

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines