35 |
|
self.executable_arch = self.scram.getArch() |
36 |
|
self.tgz_name = 'default.tgz' |
37 |
|
self.scriptName = 'CMSSW.sh' |
38 |
< |
self.pset = '' #scrip use case Da |
39 |
< |
self.datasetPath = '' #scrip use case Da |
38 |
> |
self.pset = '' |
39 |
> |
self.datasetPath = '' |
40 |
|
|
41 |
|
# set FJR file name |
42 |
|
self.fjrFileName = 'crab_fjr.xml' |
43 |
|
|
44 |
|
self.version = self.scram.getSWVersion() |
45 |
|
version_array = self.version.split('_') |
46 |
< |
self.major_version = 0 |
47 |
< |
self.minor_version = 0 |
46 |
> |
self.CMSSW_major = 0 |
47 |
> |
self.CMSSW_minor = 0 |
48 |
> |
self.CMSSW_patch = 0 |
49 |
|
try: |
50 |
< |
self.major_version = int(version_array[1]) |
51 |
< |
self.minor_version = int(version_array[2]) |
50 |
> |
self.CMSSW_major = int(version_array[1]) |
51 |
> |
self.CMSSW_minor = int(version_array[2]) |
52 |
> |
self.CMSSW_patch = int(version_array[3]) |
53 |
|
except: |
54 |
< |
msg = "Cannot parse CMSSW version string: " + "_".join(version_array) + " for major and minor release number!" |
54 |
> |
msg = "Cannot parse CMSSW version string: " + self.version + " for major and minor release number!" |
55 |
|
raise CrabException(msg) |
56 |
|
|
55 |
– |
|
56 |
– |
# |
57 |
– |
# Try to block creation in case of arch/version mismatch |
58 |
– |
# |
59 |
– |
|
60 |
– |
# a = string.split(self.version, "_") |
61 |
– |
# |
62 |
– |
# if int(a[1]) == 1 and (int(a[2]) < 5 and self.executable_arch.find('slc4') == 0): |
63 |
– |
# msg = "Warning: You are using %s version of CMSSW with %s architecture. \n--> Did you compile your libraries with SLC3? Otherwise you can find some problems running on SLC4 Grid nodes.\n"%(self.version, self.executable_arch) |
64 |
– |
# common.logger.message(msg) |
65 |
– |
# if int(a[1]) == 1 and (int(a[2]) >= 5 and self.executable_arch.find('slc3') == 0): |
66 |
– |
# msg = "Error: CMS does not support %s with %s architecture"%(self.version, self.executable_arch) |
67 |
– |
# raise CrabException(msg) |
68 |
– |
# |
69 |
– |
|
70 |
– |
|
57 |
|
### collect Data cards |
58 |
|
|
59 |
|
if not cfg_params.has_key('CMSSW.datasetpath'): |
70 |
|
|
71 |
|
self.dataTiers = [] |
72 |
|
|
73 |
+ |
self.debug_pset = cfg_params.get('USER.debug_pset',False) |
74 |
+ |
|
75 |
|
## now the application |
76 |
|
self.executable = cfg_params.get('CMSSW.executable','cmsRun') |
77 |
|
log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable) |
115 |
|
raise CrabException(msg) |
116 |
|
self.additional_inbox_files.append(string.strip(self.scriptExe)) |
117 |
|
|
130 |
– |
#CarlosDaniele |
118 |
|
if self.datasetPath == None and self.pset == None and self.scriptExe == '' : |
119 |
|
msg ="Error. script_exe not defined" |
120 |
|
raise CrabException(msg) |
137 |
|
if not os.path.exists(file): |
138 |
|
raise CrabException("Additional input file not found: "+file) |
139 |
|
pass |
153 |
– |
# fname = string.split(file, '/')[-1] |
154 |
– |
# storedFile = common.work_space.pathForTgz()+'share/'+fname |
155 |
– |
# shutil.copyfile(file, storedFile) |
140 |
|
self.additional_inbox_files.append(string.strip(file)) |
141 |
|
pass |
142 |
|
pass |
162 |
|
if cfg_params.has_key('CMSSW.total_number_of_events'): |
163 |
|
self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events']) |
164 |
|
self.selectTotalNumberEvents = 1 |
165 |
+ |
if self.selectNumberOfJobs == 1: |
166 |
+ |
if int(self.total_number_of_events) < int(self.theNumberOfJobs): |
167 |
+ |
msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs ' |
168 |
+ |
raise CrabException(msg) |
169 |
|
else: |
170 |
|
self.total_number_of_events = 0 |
171 |
|
self.selectTotalNumberEvents = 0 |
172 |
|
|
173 |
< |
if self.pset != None: #CarlosDaniele |
173 |
> |
if self.pset != None: |
174 |
|
if ( (self.selectTotalNumberEvents + self.selectEventsPerJob + self.selectNumberOfJobs) != 2 ): |
175 |
|
msg = 'Must define exactly two of total_number_of_events, events_per_job, or number_of_jobs.' |
176 |
|
raise CrabException(msg) |
200 |
|
if self.sourceSeed: |
201 |
|
print "pythia_seed is a deprecated parameter. Use preserve_seeds or increment_seeds in the future.\n","Added to increment_seeds." |
202 |
|
self.incrementSeeds.append('sourceSeed') |
203 |
+ |
self.incrementSeeds.append('theSource') |
204 |
|
|
205 |
|
self.sourceSeedVtx = cfg_params.get('CMSSW.vtx_seed',None) |
206 |
|
if self.sourceSeedVtx: |
243 |
|
|
244 |
|
## Select Splitting |
245 |
|
if self.selectNoInput: |
246 |
< |
if self.pset == None: #CarlosDaniele |
246 |
> |
if self.pset == None: |
247 |
|
self.jobSplittingForScript() |
248 |
|
else: |
249 |
|
self.jobSplittingNoInput() |
251 |
|
self.jobSplittingByBlocks(blockSites) |
252 |
|
|
253 |
|
# modify Pset |
254 |
< |
if self.pset != None: #CarlosDaniele |
254 |
> |
if self.pset != None: |
255 |
|
try: |
256 |
|
# Add FrameworkJobReport to parameter-set, set max events. |
257 |
|
# Reset later for data jobs by writeCFG which does all modifications |
259 |
|
PsetEdit.maxEvent(self.eventsPerJob) |
260 |
|
PsetEdit.psetWriter(self.configFilename()) |
261 |
|
except: |
262 |
< |
msg='Error while manipuliating ParameterSet: exiting...' |
262 |
> |
msg='Error while manipulating ParameterSet: exiting...' |
263 |
|
raise CrabException(msg) |
264 |
|
self.tgzNameWithPath = self.getTarBall(self.executable) |
265 |
|
|
292 |
|
self.eventsbyfile=self.pubdata.getEventsPerFile() |
293 |
|
|
294 |
|
## get max number of events |
295 |
< |
self.maxEvents=self.pubdata.getMaxEvents() ## self.maxEvents used in Creator.py |
295 |
> |
self.maxEvents=self.pubdata.getMaxEvents() |
296 |
|
|
297 |
|
## Contact the DLS and build a list of sites hosting the fileblocks |
298 |
|
try: |
316 |
|
|
317 |
|
return sites |
318 |
|
|
330 |
– |
# to Be Removed DS -- BL |
331 |
– |
# def setArgsList(self, argsList): |
332 |
– |
# self.argsList = argsList |
333 |
– |
|
319 |
|
def jobSplittingByBlocks(self, blockSites): |
320 |
|
""" |
321 |
|
Perform job splitting. Jobs run over an integer number of files |
594 |
|
return |
595 |
|
|
596 |
|
|
597 |
< |
def jobSplittingForScript(self):#CarlosDaniele |
597 |
> |
def jobSplittingForScript(self): |
598 |
|
""" |
599 |
|
Perform job splitting based on number of job |
600 |
|
""" |
610 |
|
# argument is seed number.$i |
611 |
|
self.list_of_args = [] |
612 |
|
for i in range(self.total_number_of_jobs): |
628 |
– |
## Since there is no input, any site is good |
629 |
– |
# self.jobDestination.append(["Any"]) |
613 |
|
self.jobDestination.append([""]) |
631 |
– |
## no random seed |
614 |
|
self.list_of_args.append([str(i)]) |
615 |
|
return |
616 |
|
|
617 |
|
def split(self, jobParams): |
618 |
|
|
637 |
– |
#### Fabio |
619 |
|
njobs = self.total_number_of_jobs |
620 |
|
arglist = self.list_of_args |
621 |
|
# create the empty structure |
632 |
|
argu='' |
633 |
|
if len(jobParams[job]): |
634 |
|
argu += concString.join(jobParams[job] ) |
635 |
< |
job_ToSave['arguments']= str(job+1)+' '+argu## new BL--DS |
636 |
< |
job_ToSave['dlsDestination']= self.jobDestination[job]## new BL--DS |
656 |
< |
#common._db.updateJob_(job,job_ToSave)## new BL--DS |
635 |
> |
job_ToSave['arguments']= str(job+1)+' '+argu |
636 |
> |
job_ToSave['dlsDestination']= self.jobDestination[job] |
637 |
|
listField.append(job_ToSave) |
638 |
|
msg="Job "+str(job)+" Arguments: "+str(job+1)+" "+argu+"\n" \ |
639 |
|
+" Destination: "+str(self.jobDestination[job]) |
640 |
|
common.logger.debug(5,msg) |
641 |
< |
#common.logger.debug(5,"Job "+str(job)+" Destination: "+str(self.jobDestination[job])) |
662 |
< |
common._db.updateJob_(listID,listField)## new BL--DS |
663 |
< |
## Pay Attention Here....DS--BL |
641 |
> |
common._db.updateJob_(listID,listField) |
642 |
|
self.argsList = (len(jobParams[0])+1) |
643 |
|
|
644 |
|
return |
645 |
|
|
646 |
|
def numberOfJobs(self): |
669 |
– |
# Fabio |
647 |
|
return self.total_number_of_jobs |
648 |
|
|
649 |
|
def getTarBall(self, exe): |
650 |
|
""" |
651 |
|
Return the TarBall with lib and exe |
652 |
|
""" |
676 |
– |
|
677 |
– |
# if it exist, just return it |
678 |
– |
# |
679 |
– |
# Marco. Let's start to use relative path for Boss XML files |
680 |
– |
# |
653 |
|
self.tgzNameWithPath = common.work_space.pathForTgz()+'share/'+self.tgz_name |
654 |
|
if os.path.exists(self.tgzNameWithPath): |
655 |
|
return self.tgzNameWithPath |
663 |
|
|
664 |
|
# First of all declare the user Scram area |
665 |
|
swArea = self.scram.getSWArea_() |
694 |
– |
#print "swArea = ", swArea |
695 |
– |
# swVersion = self.scram.getSWVersion() |
696 |
– |
# print "swVersion = ", swVersion |
666 |
|
swReleaseTop = self.scram.getReleaseTop_() |
698 |
– |
#print "swReleaseTop = ", swReleaseTop |
667 |
|
|
668 |
|
## check if working area is release top |
669 |
|
if swReleaseTop == '' or swArea == swReleaseTop: |
765 |
|
Returns part of a job script which prepares |
766 |
|
the execution environment for the job 'nj'. |
767 |
|
""" |
768 |
+ |
if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3): |
769 |
+ |
psetName = 'pset.py' |
770 |
+ |
else: |
771 |
+ |
psetName = 'pset.cfg' |
772 |
|
# Prepare JobType-independent part |
773 |
|
txt = '\n#Written by cms_cmssw::wsSetupEnvironment\n' |
774 |
|
txt += 'echo ">>> setup environment"\n' |
787 |
|
txt += ' cd $WORKING_DIR\n' |
788 |
|
txt += ' echo ">>> current directory (WORKING_DIR): $WORKING_DIR"\n' |
789 |
|
txt += self.wsSetupCMSOSGEnvironment_() |
818 |
– |
#txt += ' echo "### Set SCRAM ARCH to ' + self.executable_arch + ' ###"\n' |
819 |
– |
#txt += ' export SCRAM_ARCH='+self.executable_arch+'\n' |
790 |
|
txt += 'fi\n' |
791 |
|
|
792 |
|
# Prepare JobType-specific part |
825 |
|
|
826 |
|
# Prepare job-specific part |
827 |
|
job = common.job_list[nj] |
858 |
– |
### FEDE FOR DBS OUTPUT PUBLICATION |
828 |
|
if (self.datasetPath): |
829 |
|
txt += '\n' |
830 |
|
txt += 'DatasetPath='+self.datasetPath+'\n' |
860 |
|
txt += 'FirstRun=${args[1]}; export FirstRun\n' |
861 |
|
txt += 'echo "FirstRun: <$FirstRun>"\n' |
862 |
|
|
863 |
< |
txt += 'mv -f '+pset+' pset.cfg\n' |
863 |
> |
txt += 'mv -f ' + pset + ' ' + psetName + '\n' |
864 |
|
|
865 |
|
|
866 |
|
if self.pset != None: |
867 |
+ |
# FUTURE: Can simply for 2_1_x and higher |
868 |
|
txt += '\n' |
869 |
< |
txt += 'echo "***** cat pset.cfg *********"\n' |
870 |
< |
txt += 'cat pset.cfg\n' |
871 |
< |
txt += 'echo "****** end pset.cfg ********"\n' |
872 |
< |
txt += '\n' |
873 |
< |
txt += 'PSETHASH=`EdmConfigHash < pset.cfg` \n' |
869 |
> |
if self.debug_pset==True: |
870 |
> |
txt += 'echo "***** cat ' + psetName + ' *********"\n' |
871 |
> |
txt += 'cat ' + psetName + '\n' |
872 |
> |
txt += 'echo "****** end ' + psetName + ' ********"\n' |
873 |
> |
txt += '\n' |
874 |
> |
txt += 'PSETHASH=`edmConfigHash < ' + psetName + '` \n' |
875 |
|
txt += 'echo "PSETHASH = $PSETHASH" \n' |
876 |
|
txt += '\n' |
877 |
|
return txt |
922 |
|
txt += 'rm -r lib/ module/ \n' |
923 |
|
txt += 'mv $RUNTIME_AREA/lib/ . \n' |
924 |
|
txt += 'mv $RUNTIME_AREA/module/ . \n' |
925 |
< |
if self.dataExist == True: txt += 'mv $RUNTIME_AREA/src/ . \n' |
925 |
> |
if self.dataExist == True: |
926 |
> |
txt += 'rm -r src/ \n' |
927 |
> |
txt += 'mv $RUNTIME_AREA/src/ . \n' |
928 |
|
if len(self.additional_inbox_files)>0: |
929 |
|
for file in self.additional_inbox_files: |
930 |
< |
txt += 'mv $RUNTIME_AREA/'+file+' . \n' |
930 |
> |
txt += 'mv $RUNTIME_AREA/'+os.path.basename(file)+' . \n' |
931 |
|
txt += 'mv $RUNTIME_AREA/ProdCommon/ . \n' |
932 |
|
|
933 |
|
txt += 'if [ -z "$PYTHONPATH" ]; then\n' |
947 |
|
""" |
948 |
|
|
949 |
|
def executableName(self): |
950 |
< |
if self.scriptExe: #CarlosDaniele |
950 |
> |
if self.scriptExe: |
951 |
|
return "sh " |
952 |
|
else: |
953 |
|
return self.executable |
960 |
|
ex_args = "" |
961 |
|
# FUTURE: This tests the CMSSW version. Can remove code as versions deprecated |
962 |
|
# Framework job report |
963 |
< |
if (self.major_version >= 1 and self.minor_version >= 5) or (self.major_version >= 2): |
963 |
> |
if (self.CMSSW_major >= 1 and self.CMSSW_minor >= 5) or (self.CMSSW_major >= 2): |
964 |
|
ex_args += " -j $RUNTIME_AREA/crab_fjr_$NJob.xml" |
965 |
< |
# Type of cfg file |
966 |
< |
if self.major_version >= 2 : |
965 |
> |
# Type of config file |
966 |
> |
if self.CMSSW_major >= 2 : |
967 |
|
ex_args += " -p pset.py" |
968 |
|
else: |
969 |
|
ex_args += " -p pset.cfg" |
974 |
|
Returns a list of filenames to be put in JDL input sandbox. |
975 |
|
""" |
976 |
|
inp_box = [] |
1004 |
– |
# # dict added to delete duplicate from input sandbox file list |
1005 |
– |
# seen = {} |
1006 |
– |
## code |
977 |
|
if os.path.isfile(self.tgzNameWithPath): |
978 |
|
inp_box.append(self.tgzNameWithPath) |
979 |
|
wrapper = os.path.basename(str(common._db.queryTask('scriptName'))) |
1070 |
|
req='Member("VO-cms-' + \ |
1071 |
|
self.version + \ |
1072 |
|
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
1073 |
< |
## SL add requirement for OS version only if SL4 |
1104 |
< |
#reSL4 = re.compile( r'slc4' ) |
1105 |
< |
if self.executable_arch: # and reSL4.search(self.executable_arch): |
1073 |
> |
if self.executable_arch: |
1074 |
|
req+=' && Member("VO-cms-' + \ |
1075 |
|
self.executable_arch + \ |
1076 |
|
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
1084 |
|
def configFilename(self): |
1085 |
|
""" return the config filename """ |
1086 |
|
# FUTURE: Can remove cfg mode for CMSSW >= 2_1_x |
1087 |
< |
if (self.major_version >= 2 and self.minor_version >= 1) or (self.major_version >= 3): |
1087 |
> |
if (self.CMSSW_major >= 2 and self.CMSSW_minor >= 1) or (self.CMSSW_major >= 3): |
1088 |
|
return self.name()+'.py' |
1089 |
|
else: |
1090 |
|
return self.name()+'.cfg' |
1113 |
|
|
1114 |
|
return txt |
1115 |
|
|
1148 |
– |
### OLI_DANIELE |
1116 |
|
def wsSetupCMSLCGEnvironment_(self): |
1117 |
|
""" |
1118 |
|
Returns part of a job script which is prepares |
1147 |
|
txt += ' echo "==> setup cms environment ok"\n' |
1148 |
|
return txt |
1149 |
|
|
1183 |
– |
### FEDE FOR DBS OUTPUT PUBLICATION |
1150 |
|
def modifyReport(self, nj): |
1151 |
|
""" |
1152 |
|
insert the part of the script that modifies the FrameworkJob Report |
1153 |
|
""" |
1188 |
– |
|
1154 |
|
txt = '\n#Written by cms_cmssw::modifyReport\n' |
1155 |
|
publish_data = int(self.cfg_params.get('USER.publish_data',0)) |
1156 |
|
if (publish_data == 1): |
1186 |
|
txt += 'fi\n' |
1187 |
|
return txt |
1188 |
|
|
1189 |
+ |
def wsParseFJR(self): |
1190 |
+ |
""" |
1191 |
+ |
Parse the FrameworkJobReport to obtain useful infos |
1192 |
+ |
""" |
1193 |
+ |
txt = '\n#Written by cms_cmssw::wsParseFJR\n' |
1194 |
+ |
txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n' |
1195 |
+ |
txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n' |
1196 |
+ |
txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n' |
1197 |
+ |
txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --MonitorID $MonitorID --MonitorJobID $MonitorJobID`\n' |
1198 |
+ |
txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n' |
1199 |
+ |
txt += ' tmp_executable_exit_status=`echo $cmd_out | awk -F\; \'{print $1}\' | awk -F \' \' \'{print $NF}\'`\n' |
1200 |
+ |
txt += ' if [ -n $tmp_executable_exit_status ];then\n' |
1201 |
+ |
txt += ' executable_exit_status=$tmp_executable_exit_status\n' |
1202 |
+ |
txt += ' fi\n' |
1203 |
+ |
txt += ' if [ $executable_exit_status -eq 50115 ];then\n' |
1204 |
+ |
txt += ' echo ">>> crab_fjr.xml contents: "\n' |
1205 |
+ |
txt += ' cat $RUNTIME_AREA/crab_fjr_NJob.xml\n' |
1206 |
+ |
txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n' |
1207 |
+ |
txt += ' else\n' |
1208 |
+ |
txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n' |
1209 |
+ |
txt += ' fi\n' |
1210 |
+ |
txt += ' else\n' |
1211 |
+ |
txt += ' echo "CRAB python script to parse CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n' |
1212 |
+ |
txt += ' fi\n' |
1213 |
+ |
#### Patch to check input data reading for CMSSW16x Hopefully we-ll remove it asap |
1214 |
+ |
|
1215 |
+ |
if self.datasetPath: |
1216 |
+ |
# VERIFY PROCESSED DATA |
1217 |
+ |
txt += ' if [ $executable_exit_status -eq 0 ];then\n' |
1218 |
+ |
txt += ' echo ">>> Verify list of processed files:"\n' |
1219 |
+ |
txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n' |
1220 |
+ |
txt += ' grep LFN $RUNTIME_AREA/crab_fjr_$NJob.xml |cut -d">" -f2|cut -d"<" -f1|grep "/" > processed-files.txt\n' |
1221 |
+ |
txt += ' cat input-files.txt | sort | uniq > tmp.txt\n' |
1222 |
+ |
txt += ' mv tmp.txt input-files.txt\n' |
1223 |
+ |
txt += ' echo "cat input-files.txt"\n' |
1224 |
+ |
txt += ' echo "----------------------"\n' |
1225 |
+ |
txt += ' cat input-files.txt\n' |
1226 |
+ |
txt += ' cat processed-files.txt | sort | uniq > tmp.txt\n' |
1227 |
+ |
txt += ' mv tmp.txt processed-files.txt\n' |
1228 |
+ |
txt += ' echo "----------------------"\n' |
1229 |
+ |
txt += ' echo "cat processed-files.txt"\n' |
1230 |
+ |
txt += ' echo "----------------------"\n' |
1231 |
+ |
txt += ' cat processed-files.txt\n' |
1232 |
+ |
txt += ' echo "----------------------"\n' |
1233 |
+ |
txt += ' diff -q input-files.txt processed-files.txt\n' |
1234 |
+ |
txt += ' fileverify_status=$?\n' |
1235 |
+ |
txt += ' if [ $fileverify_status -ne 0 ]; then\n' |
1236 |
+ |
txt += ' executable_exit_status=30001\n' |
1237 |
+ |
txt += ' echo "ERROR ==> not all input files processed"\n' |
1238 |
+ |
txt += ' echo " ==> list of processed files from crab_fjr.xml differs from list in pset.cfg"\n' |
1239 |
+ |
txt += ' echo " ==> diff input-files.txt processed-files.txt"\n' |
1240 |
+ |
txt += ' fi\n' |
1241 |
+ |
txt += ' fi\n' |
1242 |
+ |
txt += '\n' |
1243 |
+ |
txt += 'else\n' |
1244 |
+ |
txt += ' echo "CRAB FrameworkJobReport crab_fjr.xml is not available, using exit code of executable from command line."\n' |
1245 |
+ |
txt += 'fi\n' |
1246 |
+ |
txt += '\n' |
1247 |
+ |
txt += 'echo "ExeExitCode=$executable_exit_status" | tee -a $RUNTIME_AREA/$repo\n' |
1248 |
+ |
txt += 'echo "EXECUTABLE_EXIT_STATUS = $executable_exit_status"\n' |
1249 |
+ |
txt += 'job_exit_code=$executable_exit_status\n' |
1250 |
+ |
|
1251 |
+ |
return txt |
1252 |
+ |
|
1253 |
|
def setParam_(self, param, value): |
1254 |
|
self._params[param] = value |
1255 |
|
|