ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.178 by spiga, Sun Apr 20 09:34:40 2008 UTC vs.
Revision 1.187 by spiga, Mon May 26 16:53:39 2008 UTC

# Line 34 | Line 34 | class Cmssw(JobType):
34          self.executable = ''
35          self.executable_arch = self.scram.getArch()
36          self.tgz_name = 'default.tgz'
37        self.additional_tgz_name = 'additional.tgz'
37          self.scriptName = 'CMSSW.sh'
38 <        self.pset = ''      #scrip use case Da
39 <        self.datasetPath = '' #scrip use case Da
38 >        self.pset = ''  
39 >        self.datasetPath = ''
40  
41          # set FJR file name
42          self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45 <
46 <        #
47 <        # Try to block creation in case of arch/version mismatch
48 <        #
49 <
50 < #        a = string.split(self.version, "_")
51 < #
52 < #        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
53 < #            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
54 < #            common.logger.message(msg)
55 < #        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
57 < #            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
58 < #            raise CrabException(msg)
59 < #
60 <
45 >        version_array = self.version.split('_')
46 >        self.CMSSW_major = 0
47 >        self.CMSSW_minor = 0
48 >        self.CMSSW_patch = 0
49 >        try:
50 >            self.CMSSW_major = int(version_array[1])
51 >            self.CMSSW_minor = int(version_array[2])
52 >            self.CMSSW_patch = int(version_array[3])
53 >        except:
54 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
55 >            raise CrabException(msg)
56  
57          ### collect Data cards
58  
# Line 118 | Line 113 | class Cmssw(JobType):
113                  raise CrabException(msg)
114              self.additional_inbox_files.append(string.strip(self.scriptExe))
115  
121        #CarlosDaniele
116          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
117              msg ="Error. script_exe  not defined"
118              raise CrabException(msg)
# Line 141 | Line 135 | class Cmssw(JobType):
135                      if not os.path.exists(file):
136                          raise CrabException("Additional input file not found: "+file)
137                      pass
144                    # fname = string.split(file, '/')[-1]
145                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
146                    # shutil.copyfile(file, storedFile)
138                      self.additional_inbox_files.append(string.strip(file))
139                  pass
140              pass
# Line 173 | Line 164 | class Cmssw(JobType):
164              self.total_number_of_events = 0
165              self.selectTotalNumberEvents = 0
166  
167 <        if self.pset != None: #CarlosDaniele
167 >        if self.pset != None:
168               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
169                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
170                   raise CrabException(msg)
# Line 203 | Line 194 | class Cmssw(JobType):
194          if self.sourceSeed:
195              print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
196              self.incrementSeeds.append('sourceSeed')
197 +            self.incrementSeeds.append('theSource')
198  
199          self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
200          if self.sourceSeedVtx:
# Line 242 | Line 234 | class Cmssw(JobType):
234              blockSites = self.DataDiscoveryAndLocation(cfg_params)
235          #DBSDLS-end
236  
245        self.tgzNameWithPath = self.getTarBall(self.executable)
237  
238          ## Select Splitting
239          if self.selectNoInput:
240 <            if self.pset == None: #CarlosDaniele
240 >            if self.pset == None:
241                  self.jobSplittingForScript()
242              else:
243                  self.jobSplittingNoInput()
# Line 254 | Line 245 | class Cmssw(JobType):
245              self.jobSplittingByBlocks(blockSites)
246  
247          # modify Pset
248 <        if self.pset != None: #CarlosDaniele
248 >        if self.pset != None:
249              try:
250                  # Add FrameworkJobReport to parameter-set, set max events.
251                  # Reset later for data jobs by writeCFG which does all modifications
252 <                PsetEdit.addCrabFJR(self.fjrFileName)
252 >                PsetEdit.addCrabFJR(self.fjrFileName) # FUTURE: Job report addition not needed by CMSSW>1.5
253                  PsetEdit.maxEvent(self.eventsPerJob)
254                  PsetEdit.psetWriter(self.configFilename())
255              except:
256 <                msg='Error while manipuliating ParameterSet: exiting...'
256 >                msg='Error while manipulating ParameterSet: exiting...'
257                  raise CrabException(msg)
258 +        self.tgzNameWithPath = self.getTarBall(self.executable)
259  
260      def DataDiscoveryAndLocation(self, cfg_params):
261  
# Line 294 | Line 286 | class Cmssw(JobType):
286          self.eventsbyfile=self.pubdata.getEventsPerFile()
287  
288          ## get max number of events
289 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
289 >        self.maxEvents=self.pubdata.getMaxEvents()
290  
291          ## Contact the DLS and build a list of sites hosting the fileblocks
292          try:
# Line 318 | Line 310 | class Cmssw(JobType):
310  
311          return sites
312  
321  # to Be Removed  DS -- BL
322  #  def setArgsList(self, argsList):
323  #      self.argsList = argsList
324
313      def jobSplittingByBlocks(self, blockSites):
314          """
315          Perform job splitting. Jobs run over an integer number of files
# Line 600 | Line 588 | class Cmssw(JobType):
588          return
589  
590  
591 <    def jobSplittingForScript(self):#CarlosDaniele
591 >    def jobSplittingForScript(self):
592          """
593          Perform job splitting based on number of job
594          """
# Line 616 | Line 604 | class Cmssw(JobType):
604          # argument is seed number.$i
605          self.list_of_args = []
606          for i in range(self.total_number_of_jobs):
619            ## Since there is no input, any site is good
620           # self.jobDestination.append(["Any"])
607              self.jobDestination.append([""])
622            ## no random seed
608              self.list_of_args.append([str(i)])
609          return
610  
611      def split(self, jobParams):
612  
628        #### Fabio
613          njobs = self.total_number_of_jobs
614          arglist = self.list_of_args
615          # create the empty structure
# Line 642 | Line 626 | class Cmssw(JobType):
626              argu=''
627              if len(jobParams[job]):
628                  argu +=   concString.join(jobParams[job] )
629 <            job_ToSave['arguments']= str(job+1)+' '+argu## new BL--DS
630 <            job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
647 <            #common._db.updateJob_(job,job_ToSave)## new BL--DS
629 >            job_ToSave['arguments']= str(job+1)+' '+argu
630 >            job_ToSave['dlsDestination']= self.jobDestination[job]
631              listField.append(job_ToSave)
632              msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
633              +"                     Destination: "+str(self.jobDestination[job])
634              common.logger.debug(5,msg)
635 <            #common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
636 <        common._db.updateJob_(listID,listField)## new BL--DS
654 <        ## Pay Attention Here....DS--BL
655 <        self.argsList = (len(jobParams[1])+1)
635 >        common._db.updateJob_(listID,listField)
636 >        self.argsList = (len(jobParams[0])+1)
637  
638          return
639  
640      def numberOfJobs(self):
660        # Fabio
641          return self.total_number_of_jobs
642  
643      def getTarBall(self, exe):
644          """
645          Return the TarBall with lib and exe
646          """
667
668        # if it exist, just return it
669        #
670        # Marco. Let's start to use relative path for Boss XML files
671        #
647          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
648          if os.path.exists(self.tgzNameWithPath):
649              return self.tgzNameWithPath
# Line 682 | Line 657 | class Cmssw(JobType):
657  
658          # First of all declare the user Scram area
659          swArea = self.scram.getSWArea_()
685        #print "swArea = ", swArea
686        # swVersion = self.scram.getSWVersion()
687        # print "swVersion = ", swVersion
660          swReleaseTop = self.scram.getReleaseTop_()
689        #print "swReleaseTop = ", swReleaseTop
661  
662          ## check if working area is release top
663          if swReleaseTop == '' or swArea == swReleaseTop:
# Line 733 | Line 704 | class Cmssw(JobType):
704  
705              ## Now check if any data dir(s) is present
706              swAreaLen=len(swArea)
707 +            self.dataExist = False
708              for root, dirs, files in os.walk(swArea):
709                  if "data" in dirs:
710 +                    self.dataExist=True
711                      common.logger.debug(5,"data "+root+"/data"+" to be tarred")
712                      tar.add(root+"/data",root[swAreaLen:]+"/data")
713  
714 +            ### CMSSW ParameterSet
715 +            if not self.pset is None:
716 +                cfg_file = common.work_space.jobDir()+self.configFilename()
717 +                tar.add(cfg_file,self.configFilename())
718 +                common.logger.debug(5,"File added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
719 +
720  
721              ## Add ProdCommon dir to tar
722              prodcommonDir = 'ProdCommon'
723              prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon'
724              if os.path.isdir(prodcommonPath):
725                  tar.add(prodcommonPath,prodcommonDir)
726 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
727  
728 +            ##### ML stuff
729 +            ML_file_list=['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py']
730 +            path=os.environ['CRABDIR'] + '/python/'
731 +            for file in ML_file_list:
732 +                tar.add(path+file,file)
733              common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
734 +
735 +            ##### Utils
736 +            Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']
737 +            for file in Utils_file_list:
738 +                tar.add(path+file,file)
739 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
740 +
741 +            ##### AdditionalFiles
742 +            for file in self.additional_inbox_files:
743 +                tar.add(file,string.split(file,'/')[-1])
744 +            common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames()))
745 +
746              tar.close()
747          except :
748              raise CrabException('Could not create tar-ball')
# Line 756 | Line 753 | class Cmssw(JobType):
753              raise CrabException('Input sandbox size of ' + str(float(tarballinfo.st_size)/1024.0/1024.0) + ' MB is larger than the allowed ' + str(self.MaxTarBallSize) + ' MB input sandbox limit and not supported by the used GRID submission system. Please make sure that no unnecessary files are in all data directories in your local CMSSW project area as they are automatically packed into the input sandbox.')
754  
755          ## create tar-ball with ML stuff
759        self.MLtgzfile =  common.work_space.pathForTgz()+'share/MLfiles.tgz'
760        try:
761            tar = tarfile.open(self.MLtgzfile, "w:gz")
762            path=os.environ['CRABDIR'] + '/python/'
763            for file in ['report.py', 'DashboardAPI.py', 'Logger.py', 'ProcInfo.py', 'apmon.py', 'parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py']:
764                tar.add(path+file,file)
765            common.logger.debug(5,"Files added to "+self.MLtgzfile+" : "+str(tar.getnames()))
766            tar.close()
767        except :
768            raise CrabException('Could not create ML files tar-ball')
769
770        return
771
772    def additionalInputFileTgz(self):
773        """
774        Put all additional files into a tar ball and return its name
775        """
776        import tarfile
777        tarName=  common.work_space.pathForTgz()+'share/'+self.additional_tgz_name
778        tar = tarfile.open(tarName, "w:gz")
779        for file in self.additional_inbox_files:
780            tar.add(file,string.split(file,'/')[-1])
781        common.logger.debug(5,"Files added to "+self.additional_tgz_name+" : "+str(tar.getnames()))
782        tar.close()
783        return tarName
756  
757      def wsSetupEnvironment(self, nj=0):
758          """
759          Returns part of a job script which prepares
760          the execution environment for the job 'nj'.
761          """
762 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
763 +            psetName = 'pset.py'
764 +        else:
765 +            psetName = 'pset.cfg'
766          # Prepare JobType-independent part
767          txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
768          txt += 'echo ">>> setup environment"\n'
# Line 805 | Line 781 | class Cmssw(JobType):
781          txt += '    cd $WORKING_DIR\n'
782          txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
783          txt += self.wsSetupCMSOSGEnvironment_()
808        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
809        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
784          txt += 'fi\n'
785  
786          # Prepare JobType-specific part
# Line 822 | Line 796 | class Cmssw(JobType):
796          txt += '    func_exit\n'
797          txt += 'fi \n'
798          txt += 'cd '+self.version+'\n'
825        ########## FEDE FOR DBS2 ######################
799          txt += 'SOFTWARE_DIR=`pwd`\n'
800          txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n'
828        ###############################################
829        ### needed grep for bug in scramv1 ###
801          txt += 'eval `'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME`\n'
802 +        txt += 'if [ $? != 0 ] ; then\n'
803 +        txt += '    echo "ERROR ==> Problem with the command: "\n'
804 +        txt += '    echo "eval \`'+scram+' runtime -sh | grep -v SCRAMRT_LSB_JOBNAME \` at `hostname`"\n'
805 +        txt += '    job_exit_code=10034\n'
806 +        txt += '    func_exit\n'
807 +        txt += 'fi \n'
808          # Handle the arguments:
809          txt += "\n"
810          txt += "## number of arguments (first argument always jobnumber)\n"
# Line 842 | Line 819 | class Cmssw(JobType):
819  
820          # Prepare job-specific part
821          job = common.job_list[nj]
845        ### FEDE FOR DBS OUTPUT PUBLICATION
822          if (self.datasetPath):
823              txt += '\n'
824              txt += 'DatasetPath='+self.datasetPath+'\n'
# Line 878 | Line 854 | class Cmssw(JobType):
854                      txt += 'FirstRun=${args[1]}; export FirstRun\n'
855                      txt += 'echo "FirstRun: <$FirstRun>"\n'
856  
857 <            txt += 'mv -f '+pset+' pset.cfg\n'
857 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
858  
883        if len(self.additional_inbox_files) > 0:
884            txt += 'if [ -e $RUNTIME_AREA/'+self.additional_tgz_name+' ] ; then\n'
885            txt += '  tar xzvf $RUNTIME_AREA/'+self.additional_tgz_name+'\n'
886            txt += 'fi\n'
887            pass
859  
860          if self.pset != None:
861 +            # FUTURE: Can simply for 2_1_x and higher
862              txt += '\n'
863 <            txt += 'echo "***** cat pset.cfg *********"\n'
864 <            txt += 'cat pset.cfg\n'
865 <            txt += 'echo "****** end pset.cfg ********"\n'
863 >            txt += 'echo "***** cat ' + psetName + ' *********"\n'
864 >            txt += 'cat ' + psetName + '\n'
865 >            txt += 'echo "****** end ' + psetName + ' ********"\n'
866              txt += '\n'
867 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
867 >            txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
868              txt += 'echo "PSETHASH = $PSETHASH" \n'
869              txt += '\n'
870          return txt
# Line 908 | Line 880 | class Cmssw(JobType):
880          if os.path.isfile(self.tgzNameWithPath):
881              txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n'
882              txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n'
883 +            txt += 'ls -Al \n'
884              txt += 'untar_status=$? \n'
885              txt += 'if [ $untar_status -ne 0 ]; then \n'
886              txt += '   echo "ERROR ==> Untarring .tgz file failed"\n'
# Line 942 | Line 915 | class Cmssw(JobType):
915          txt += 'rm -r lib/ module/ \n'
916          txt += 'mv $RUNTIME_AREA/lib/ . \n'
917          txt += 'mv $RUNTIME_AREA/module/ . \n'
918 +        if self.dataExist == True:
919 +            txt += 'rm -r src/ \n'
920 +            txt += 'mv $RUNTIME_AREA/src/ . \n'
921 +        if len(self.additional_inbox_files)>0:
922 +            for file in self.additional_inbox_files:
923 +                txt += 'mv $RUNTIME_AREA/'+file+' . \n'
924          txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
925  
926          txt += 'if [ -z "$PYTHONPATH" ]; then\n'
# Line 961 | Line 940 | class Cmssw(JobType):
940          """
941  
942      def executableName(self):
943 <        if self.scriptExe: #CarlosDaniele
943 >        if self.scriptExe:
944              return "sh "
945          else:
946              return self.executable
# Line 971 | Line 950 | class Cmssw(JobType):
950          if self.scriptExe:#CarlosDaniele
951              return   self.scriptExe + " $NJob"
952          else:
974            version_array = self.scram.getSWVersion().split('_')
975            major = 0
976            minor = 0
977            try:
978                major = int(version_array[1])
979                minor = int(version_array[2])
980            except:
981                msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
982                raise CrabException(msg)
983
953              ex_args = ""
954              # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
955              # Framework job report
956 <            if major >= 1 and minor >= 5 :
956 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
957                  ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
958 <            # Type of cfg file
959 <            if major >= 2 :
958 >            # Type of config file
959 >            if self.CMSSW_major >= 2 :
960                  ex_args += " -p pset.py"
961              else:
962                  ex_args += " -p pset.cfg"
# Line 998 | Line 967 | class Cmssw(JobType):
967          Returns a list of filenames to be put in JDL input sandbox.
968          """
969          inp_box = []
1001        # # dict added to delete duplicate from input sandbox file list
1002        # seen = {}
1003        ## code
970          if os.path.isfile(self.tgzNameWithPath):
971              inp_box.append(self.tgzNameWithPath)
1006        if os.path.isfile(self.MLtgzfile):
1007            inp_box.append(self.MLtgzfile)
1008        ## config
1009        if not self.pset is None:
1010            inp_box.append(common.work_space.pathForTgz() + 'job/' + self.configFilename())
1011        ## additional input files
1012        tgz = self.additionalInputFileTgz()
1013        inp_box.append(tgz)
1014        ## executable
972          wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
973          inp_box.append(common.work_space.pathForTgz() +'job/'+ wrapper)
974          return inp_box
# Line 1106 | Line 1063 | class Cmssw(JobType):
1063              req='Member("VO-cms-' + \
1064                   self.version + \
1065                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1066 <        ## SL add requirement for OS version only if SL4
1110 <        #reSL4 = re.compile( r'slc4' )
1111 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1066 >        if self.executable_arch:
1067              req+=' && Member("VO-cms-' + \
1068                   self.executable_arch + \
1069                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
# Line 1121 | Line 1076 | class Cmssw(JobType):
1076  
1077      def configFilename(self):
1078          """ return the config filename """
1079 <        return self.name()+'.cfg'
1079 >        # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1080 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1081 >          return self.name()+'.py'
1082 >        else:
1083 >          return self.name()+'.cfg'
1084  
1085      def wsSetupCMSOSGEnvironment_(self):
1086          """
# Line 1147 | Line 1106 | class Cmssw(JobType):
1106  
1107          return txt
1108  
1150    ### OLI_DANIELE
1109      def wsSetupCMSLCGEnvironment_(self):
1110          """
1111          Returns part of a job script which is prepares
# Line 1182 | Line 1140 | class Cmssw(JobType):
1140          txt += '    echo "==> setup cms environment ok"\n'
1141          return txt
1142  
1185    ### FEDE FOR DBS OUTPUT PUBLICATION
1143      def modifyReport(self, nj):
1144          """
1145          insert the part of the script that modifies the FrameworkJob Report
# Line 1201 | Line 1158 | class Cmssw(JobType):
1158              txt += '    SE=""\n'
1159              txt += '    SE_PATH=""\n'
1160              txt += 'fi\n'
1161 <            
1161 >
1162              txt += 'echo ">>> Modify Job Report:" \n'
1163              txt += 'chmod a+x $SOFTWARE_DIR/ProdCommon/ProdCommon/FwkJobRep/ModifyJobReport.py\n'
1164              txt += 'ProcessedDataset='+processedDataset+'\n'

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines