ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/COMP/CRAB/python/cms_cmssw.py
(Generate patch)

Comparing COMP/CRAB/python/cms_cmssw.py (file contents):
Revision 1.183 by spiga, Wed Apr 30 18:21:07 2008 UTC vs.
Revision 1.196 by ewv, Wed May 28 16:46:17 2008 UTC

# Line 35 | Line 35 | class Cmssw(JobType):
35          self.executable_arch = self.scram.getArch()
36          self.tgz_name = 'default.tgz'
37          self.scriptName = 'CMSSW.sh'
38 <        self.pset = ''      #scrip use case Da
39 <        self.datasetPath = '' #scrip use case Da
38 >        self.pset = ''
39 >        self.datasetPath = ''
40  
41          # set FJR file name
42          self.fjrFileName = 'crab_fjr.xml'
43  
44          self.version = self.scram.getSWVersion()
45          version_array = self.version.split('_')
46 <        self.major_version = 0
47 <        self.minor_version = 0
46 >        self.CMSSW_major = 0
47 >        self.CMSSW_minor = 0
48 >        self.CMSSW_patch = 0
49          try:
50 <            self.major_version = int(version_array[1])
51 <            self.minor_version = int(version_array[2])
50 >            self.CMSSW_major = int(version_array[1])
51 >            self.CMSSW_minor = int(version_array[2])
52 >            self.CMSSW_patch = int(version_array[3])
53          except:
54 <            msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!"
54 >            msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!"
55              raise CrabException(msg)
56  
55
56        #
57        # Try to block creation in case of arch/version mismatch
58        #
59
60 #        a = string.split(self.version, "_")
61 #
62 #        if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0):
63 #            msg = "Warning: You are using %s version of CMSSW  with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch)
64 #            common.logger.message(msg)
65 #        if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0):
66 #            msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch)
67 #            raise CrabException(msg)
68 #
69
70
57          ### collect Data cards
58  
59          if not cfg_params.has_key('CMSSW.datasetpath'):
# Line 84 | Line 70 | class Cmssw(JobType):
70  
71          self.dataTiers = []
72  
73 +        self.debug_pset = cfg_params.get('USER.debug_pset',False)
74 +
75          ## now the application
76          self.executable = cfg_params.get('CMSSW.executable','cmsRun')
77          log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable)
# Line 127 | Line 115 | class Cmssw(JobType):
115                  raise CrabException(msg)
116              self.additional_inbox_files.append(string.strip(self.scriptExe))
117  
130        #CarlosDaniele
118          if self.datasetPath == None and self.pset == None and self.scriptExe == '' :
119              msg ="Error. script_exe  not defined"
120              raise CrabException(msg)
# Line 150 | Line 137 | class Cmssw(JobType):
137                      if not os.path.exists(file):
138                          raise CrabException("Additional input file not found: "+file)
139                      pass
153                    # fname = string.split(file, '/')[-1]
154                    # storedFile = common.work_space.pathForTgz()+'share/'+fname
155                    # shutil.copyfile(file, storedFile)
140                      self.additional_inbox_files.append(string.strip(file))
141                  pass
142              pass
# Line 178 | Line 162 | class Cmssw(JobType):
162          if cfg_params.has_key('CMSSW.total_number_of_events'):
163              self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events'])
164              self.selectTotalNumberEvents = 1
165 +            if self.selectNumberOfJobs  == 1:
166 +                if int(self.total_number_of_events) < int(self.theNumberOfJobs):
167 +                    msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs '
168 +                    raise CrabException(msg)
169          else:
170              self.total_number_of_events = 0
171              self.selectTotalNumberEvents = 0
172  
173 <        if self.pset != None: #CarlosDaniele
173 >        if self.pset != None:
174               if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ):
175                   msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.'
176                   raise CrabException(msg)
# Line 212 | Line 200 | class Cmssw(JobType):
200          if self.sourceSeed:
201              print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds."
202              self.incrementSeeds.append('sourceSeed')
203 +            self.incrementSeeds.append('theSource')
204  
205          self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None)
206          if self.sourceSeedVtx:
# Line 254 | Line 243 | class Cmssw(JobType):
243  
244          ## Select Splitting
245          if self.selectNoInput:
246 <            if self.pset == None: #CarlosDaniele
246 >            if self.pset == None:
247                  self.jobSplittingForScript()
248              else:
249                  self.jobSplittingNoInput()
# Line 262 | Line 251 | class Cmssw(JobType):
251              self.jobSplittingByBlocks(blockSites)
252  
253          # modify Pset
254 <        if self.pset != None: #CarlosDaniele
254 >        if self.pset != None:
255              try:
256                  # Add FrameworkJobReport to parameter-set, set max events.
257                  # Reset later for data jobs by writeCFG which does all modifications
# Line 270 | Line 259 | class Cmssw(JobType):
259                  PsetEdit.maxEvent(self.eventsPerJob)
260                  PsetEdit.psetWriter(self.configFilename())
261              except:
262 <                msg='Error while manipuliating ParameterSet: exiting...'
262 >                msg='Error while manipulating ParameterSet: exiting...'
263                  raise CrabException(msg)
264          self.tgzNameWithPath = self.getTarBall(self.executable)
265  
# Line 303 | Line 292 | class Cmssw(JobType):
292          self.eventsbyfile=self.pubdata.getEventsPerFile()
293  
294          ## get max number of events
295 <        self.maxEvents=self.pubdata.getMaxEvents() ##  self.maxEvents used in Creator.py
295 >        self.maxEvents=self.pubdata.getMaxEvents()
296  
297          ## Contact the DLS and build a list of sites hosting the fileblocks
298          try:
# Line 327 | Line 316 | class Cmssw(JobType):
316  
317          return sites
318  
330  # to Be Removed  DS -- BL
331  #  def setArgsList(self, argsList):
332  #      self.argsList = argsList
333
319      def jobSplittingByBlocks(self, blockSites):
320          """
321          Perform job splitting. Jobs run over an integer number of files
# Line 609 | Line 594 | class Cmssw(JobType):
594          return
595  
596  
597 <    def jobSplittingForScript(self):#CarlosDaniele
597 >    def jobSplittingForScript(self):
598          """
599          Perform job splitting based on number of job
600          """
# Line 625 | Line 610 | class Cmssw(JobType):
610          # argument is seed number.$i
611          self.list_of_args = []
612          for i in range(self.total_number_of_jobs):
628            ## Since there is no input, any site is good
629           # self.jobDestination.append(["Any"])
613              self.jobDestination.append([""])
631            ## no random seed
614              self.list_of_args.append([str(i)])
615          return
616  
617      def split(self, jobParams):
618  
637        #### Fabio
619          njobs = self.total_number_of_jobs
620          arglist = self.list_of_args
621          # create the empty structure
# Line 651 | Line 632 | class Cmssw(JobType):
632              argu=''
633              if len(jobParams[job]):
634                  argu +=   concString.join(jobParams[job] )
635 <            job_ToSave['arguments']= str(job+1)+' '+argu## new BL--DS
636 <            job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS
656 <            #common._db.updateJob_(job,job_ToSave)## new BL--DS
635 >            job_ToSave['arguments']= str(job+1)+' '+argu
636 >            job_ToSave['dlsDestination']= self.jobDestination[job]
637              listField.append(job_ToSave)
638              msg="Job "+str(job)+" Arguments:   "+str(job+1)+" "+argu+"\n"  \
639              +"                     Destination: "+str(self.jobDestination[job])
640              common.logger.debug(5,msg)
641 <            #common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job]))
662 <        common._db.updateJob_(listID,listField)## new BL--DS
663 <        ## Pay Attention Here....DS--BL
641 >        common._db.updateJob_(listID,listField)
642          self.argsList = (len(jobParams[0])+1)
643  
644          return
645  
646      def numberOfJobs(self):
669        # Fabio
647          return self.total_number_of_jobs
648  
649      def getTarBall(self, exe):
650          """
651          Return the TarBall with lib and exe
652          """
676
677        # if it exist, just return it
678        #
679        # Marco. Let's start to use relative path for Boss XML files
680        #
653          self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name
654          if os.path.exists(self.tgzNameWithPath):
655              return self.tgzNameWithPath
# Line 691 | Line 663 | class Cmssw(JobType):
663  
664          # First of all declare the user Scram area
665          swArea = self.scram.getSWArea_()
694        #print "swArea = ", swArea
695        # swVersion = self.scram.getSWVersion()
696        # print "swVersion = ", swVersion
666          swReleaseTop = self.scram.getReleaseTop_()
698        #print "swReleaseTop = ", swReleaseTop
667  
668          ## check if working area is release top
669          if swReleaseTop == '' or swArea == swReleaseTop:
# Line 797 | Line 765 | class Cmssw(JobType):
765          Returns part of a job script which prepares
766          the execution environment for the job 'nj'.
767          """
768 +        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
769 +            psetName = 'pset.py'
770 +        else:
771 +            psetName = 'pset.cfg'
772          # Prepare JobType-independent part
773          txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n'
774          txt += 'echo ">>> setup environment"\n'
# Line 815 | Line 787 | class Cmssw(JobType):
787          txt += '    cd $WORKING_DIR\n'
788          txt += '    echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n'
789          txt += self.wsSetupCMSOSGEnvironment_()
818        #txt += '    echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n'
819        #txt += '    export SCRAM_ARCH='+self.executable_arch+'\n'
790          txt += 'fi\n'
791  
792          # Prepare JobType-specific part
# Line 855 | Line 825 | class Cmssw(JobType):
825  
826          # Prepare job-specific part
827          job = common.job_list[nj]
858        ### FEDE FOR DBS OUTPUT PUBLICATION
828          if (self.datasetPath):
829              txt += '\n'
830              txt += 'DatasetPath='+self.datasetPath+'\n'
# Line 891 | Line 860 | class Cmssw(JobType):
860                      txt += 'FirstRun=${args[1]}; export FirstRun\n'
861                      txt += 'echo "FirstRun: <$FirstRun>"\n'
862  
863 <            txt += 'mv -f '+pset+' pset.cfg\n'
863 >            txt += 'mv -f ' + pset + ' ' + psetName + '\n'
864  
865  
866          if self.pset != None:
867 +            # FUTURE: Can simply for 2_1_x and higher
868              txt += '\n'
869 <            txt += 'echo "***** cat pset.cfg *********"\n'
870 <            txt += 'cat pset.cfg\n'
871 <            txt += 'echo "****** end pset.cfg ********"\n'
872 <            txt += '\n'
873 <            txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n'
869 >            if self.debug_pset==True:
870 >                txt += 'echo "***** cat ' + psetName + ' *********"\n'
871 >                txt += 'cat ' + psetName + '\n'
872 >                txt += 'echo "****** end ' + psetName + ' ********"\n'
873 >                txt += '\n'
874 >            txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n'
875              txt += 'echo "PSETHASH = $PSETHASH" \n'
876              txt += '\n'
877          return txt
# Line 951 | Line 922 | class Cmssw(JobType):
922          txt += 'rm -r lib/ module/ \n'
923          txt += 'mv $RUNTIME_AREA/lib/ . \n'
924          txt += 'mv $RUNTIME_AREA/module/ . \n'
925 <        if self.dataExist == True: txt += 'mv $RUNTIME_AREA/src/ . \n'
925 >        if self.dataExist == True:
926 >            txt += 'rm -r src/ \n'
927 >            txt += 'mv $RUNTIME_AREA/src/ . \n'
928          if len(self.additional_inbox_files)>0:
929              for file in self.additional_inbox_files:
930 <                txt += 'mv $RUNTIME_AREA/'+file+' . \n'
930 >                txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n'
931          txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n'
932  
933          txt += 'if [ -z "$PYTHONPATH" ]; then\n'
# Line 974 | Line 947 | class Cmssw(JobType):
947          """
948  
949      def executableName(self):
950 <        if self.scriptExe: #CarlosDaniele
950 >        if self.scriptExe:
951              return "sh "
952          else:
953              return self.executable
# Line 987 | Line 960 | class Cmssw(JobType):
960              ex_args = ""
961              # FUTURE: This tests the CMSSW version. Can remove code as versions deprecated
962              # Framework job report
963 <            if (self.major_version >= 1 and self.minor_version >= 5) or (self.major_version >= 2):
963 >            if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2):
964                  ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml"
965 <            # Type of cfg file
966 <            if self.major_version >= 2 :
965 >            # Type of config file
966 >            if self.CMSSW_major >= 2 :
967                  ex_args += " -p pset.py"
968              else:
969                  ex_args += " -p pset.cfg"
# Line 1001 | Line 974 | class Cmssw(JobType):
974          Returns a list of filenames to be put in JDL input sandbox.
975          """
976          inp_box = []
1004        # # dict added to delete duplicate from input sandbox file list
1005        # seen = {}
1006        ## code
977          if os.path.isfile(self.tgzNameWithPath):
978              inp_box.append(self.tgzNameWithPath)
979          wrapper = os.path.basename(str(common._db.queryTask('scriptName')))
# Line 1100 | Line 1070 | class Cmssw(JobType):
1070              req='Member("VO-cms-' + \
1071                   self.version + \
1072                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
1073 <        ## SL add requirement for OS version only if SL4
1104 <        #reSL4 = re.compile( r'slc4' )
1105 <        if self.executable_arch: # and reSL4.search(self.executable_arch):
1073 >        if self.executable_arch:
1074              req+=' && Member("VO-cms-' + \
1075                   self.executable_arch + \
1076                   '", other.GlueHostApplicationSoftwareRunTimeEnvironment)'
# Line 1116 | Line 1084 | class Cmssw(JobType):
1084      def configFilename(self):
1085          """ return the config filename """
1086          # FUTURE: Can remove cfg mode for CMSSW >= 2_1_x
1087 <        if (self.major_version >= 2 and self.minor_version >= 1) or (self.major_version >= 3):
1087 >        if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3):
1088            return self.name()+'.py'
1089          else:
1090            return self.name()+'.cfg'
# Line 1145 | Line 1113 | class Cmssw(JobType):
1113  
1114          return txt
1115  
1148    ### OLI_DANIELE
1116      def wsSetupCMSLCGEnvironment_(self):
1117          """
1118          Returns part of a job script which is prepares
# Line 1180 | Line 1147 | class Cmssw(JobType):
1147          txt += '    echo "==> setup cms environment ok"\n'
1148          return txt
1149  
1183    ### FEDE FOR DBS OUTPUT PUBLICATION
1150      def modifyReport(self, nj):
1151          """
1152          insert the part of the script that modifies the FrameworkJob Report
1153          """
1188
1154          txt = '\n#Written by cms_cmssw::modifyReport\n'
1155          publish_data = int(self.cfg_params.get('USER.publish_data',0))
1156          if (publish_data == 1):
# Line 1221 | Line 1186 | class Cmssw(JobType):
1186              txt += 'fi\n'
1187          return txt
1188  
1189 +    def wsParseFJR(self):
1190 +        """
1191 +        Parse the FrameworkJobReport to obtain useful infos
1192 +        """
1193 +        txt = '\n#Written by cms_cmssw::wsParseFJR\n'
1194 +        txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n'
1195 +        txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n'
1196 +        txt += '    if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n'
1197 +        txt += '        cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --MonitorID $MonitorID --MonitorJobID $MonitorJobID`\n'
1198 +        txt += '        echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n'
1199 +        txt += '        tmp_executable_exit_status=`echo $cmd_out | awk -F\; \'{print $1}\' | awk -F \' \' \'{print $NF}\'`\n'
1200 +        txt += '        if [ -n $tmp_executable_exit_status ];then\n'
1201 +        txt += '            executable_exit_status=$tmp_executable_exit_status\n'
1202 +        txt += '        fi\n'
1203 +        txt += '        if [ $executable_exit_status -eq 50115 ];then\n'
1204 +        txt += '            echo ">>> crab_fjr.xml contents: "\n'
1205 +        txt += '            cat $RUNTIME_AREA/crab_fjr_NJob.xml\n'
1206 +        txt += '            echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n'
1207 +        txt += '        else\n'
1208 +        txt += '            echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n'
1209 +        txt += '        fi\n'
1210 +        txt += '    else\n'
1211 +        txt += '        echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1212 +        txt += '    fi\n'
1213 +          #### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap
1214 +
1215 +        if self.datasetPath:
1216 +          # VERIFY PROCESSED DATA
1217 +            txt += '    if [ $executable_exit_status -eq 0 ];then\n'
1218 +            txt += '      echo ">>> Verify list of processed files:"\n'
1219 +            txt += '      echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n'
1220 +            txt += '      grep LFN $RUNTIME_AREA/crab_fjr_$NJob.xml |cut -d">" -f2|cut -d"<" -f1|grep "/" > processed-files.txt\n'
1221 +            txt += '      cat input-files.txt  | sort | uniq > tmp.txt\n'
1222 +            txt += '      mv tmp.txt input-files.txt\n'
1223 +            txt += '      echo "cat input-files.txt"\n'
1224 +            txt += '      echo "----------------------"\n'
1225 +            txt += '      cat input-files.txt\n'
1226 +            txt += '      cat processed-files.txt | sort | uniq > tmp.txt\n'
1227 +            txt += '      mv tmp.txt processed-files.txt\n'
1228 +            txt += '      echo "----------------------"\n'
1229 +            txt += '      echo "cat processed-files.txt"\n'
1230 +            txt += '      echo "----------------------"\n'
1231 +            txt += '      cat processed-files.txt\n'
1232 +            txt += '      echo "----------------------"\n'
1233 +            txt += '      diff -q input-files.txt processed-files.txt\n'
1234 +            txt += '      fileverify_status=$?\n'
1235 +            txt += '      if [ $fileverify_status -ne 0 ]; then\n'
1236 +            txt += '         executable_exit_status=30001\n'
1237 +            txt += '         echo "ERROR ==> not all input files processed"\n'
1238 +            txt += '         echo "      ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n'
1239 +            txt += '         echo "      ==> diff input-files.txt processed-files.txt"\n'
1240 +            txt += '      fi\n'
1241 +            txt += '    fi\n'
1242 +            txt += '\n'
1243 +        txt += 'else\n'
1244 +        txt += '    echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n'
1245 +        txt += 'fi\n'
1246 +        txt += '\n'
1247 +        txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n'
1248 +        txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n'
1249 +        txt += 'job_exit_code=$executable_exit_status\n'
1250 +
1251 +        return txt
1252 +
1253      def setParam_(self, param, value):
1254          self._params[param] = value
1255  

Diff Legend

Removed lines
+ Added lines
< Changed lines
> Changed lines