35 |
|
self.executable_arch = self.scram.getArch() |
36 |
|
self.tgz_name = 'default.tgz' |
37 |
|
self.scriptName = 'CMSSW.sh' |
38 |
< |
self.pset = '' |
38 |
> |
self.pset = '' |
39 |
|
self.datasetPath = '' |
40 |
|
|
41 |
|
# set FJR file name |
69 |
|
self.selectNoInput = 0 |
70 |
|
|
71 |
|
self.dataTiers = [] |
72 |
< |
|
73 |
< |
self.debug_pset = cfg_params.get('USER.debug_pset',False) |
74 |
< |
|
72 |
> |
self.debugWrap = '' |
73 |
> |
self.debug_wrapper = cfg_params.get('USER.debug_wrapper',False) |
74 |
> |
if self.debug_wrapper: self.debugWrap='--debug' |
75 |
|
## now the application |
76 |
|
self.executable = cfg_params.get('CMSSW.executable','cmsRun') |
77 |
|
log.debug(6, "CMSSW::CMSSW(): executable = "+self.executable) |
119 |
|
msg ="Error. script_exe not defined" |
120 |
|
raise CrabException(msg) |
121 |
|
|
122 |
+ |
# use parent files... |
123 |
+ |
self.useParent = self.cfg_params.get('CMSSW.use_parent',False) |
124 |
+ |
|
125 |
|
## additional input files |
126 |
|
if cfg_params.has_key('USER.additional_input_files'): |
127 |
|
tmpAddFiles = string.split(cfg_params['USER.additional_input_files'],',') |
165 |
|
if cfg_params.has_key('CMSSW.total_number_of_events'): |
166 |
|
self.total_number_of_events = int(cfg_params['CMSSW.total_number_of_events']) |
167 |
|
self.selectTotalNumberEvents = 1 |
168 |
+ |
if self.selectNumberOfJobs == 1: |
169 |
+ |
if (self.total_number_of_events != -1) and int(self.total_number_of_events) < int(self.theNumberOfJobs): |
170 |
+ |
msg = 'Must specify at least one event per job. total_number_of_events > number_of_jobs ' |
171 |
+ |
raise CrabException(msg) |
172 |
|
else: |
173 |
|
self.total_number_of_events = 0 |
174 |
|
self.selectTotalNumberEvents = 0 |
254 |
|
self.jobSplittingByBlocks(blockSites) |
255 |
|
|
256 |
|
# modify Pset |
257 |
< |
if self.pset != None: |
257 |
> |
if self.pset != None: |
258 |
|
try: |
259 |
|
# Add FrameworkJobReport to parameter-set, set max events. |
260 |
|
# Reset later for data jobs by writeCFG which does all modifications |
293 |
|
self.filesbyblock=self.pubdata.getFiles() |
294 |
|
self.eventsbyblock=self.pubdata.getEventsPerBlock() |
295 |
|
self.eventsbyfile=self.pubdata.getEventsPerFile() |
296 |
+ |
self.parentFiles=self.pubdata.getParent() |
297 |
|
|
298 |
|
## get max number of events |
299 |
< |
self.maxEvents=self.pubdata.getMaxEvents() |
299 |
> |
self.maxEvents=self.pubdata.getMaxEvents() |
300 |
|
|
301 |
|
## Contact the DLS and build a list of sites hosting the fileblocks |
302 |
|
try: |
410 |
|
|
411 |
|
# ---- Iterate over the files in the block until we've met the requested ---- # |
412 |
|
# ---- total # of events or we've gone over all the files in this block ---- # |
413 |
+ |
pString='' |
414 |
|
while ( (eventsRemaining > 0) and (fileCount < numFilesInBlock) and (jobCount < totalNumberOfJobs) ): |
415 |
|
file = files[fileCount] |
416 |
+ |
if self.useParent: |
417 |
+ |
parent = self.parentFiles[file] |
418 |
+ |
for f in parent : |
419 |
+ |
pString += '\\\"' + f + '\\\"\,' |
420 |
+ |
common.logger.debug(6, "File "+str(file)+" has the following parents: "+str(parent)) |
421 |
+ |
common.logger.write("File "+str(file)+" has the following parents: "+str(parent)) |
422 |
|
if newFile : |
423 |
|
try: |
424 |
|
numEventsInFile = self.eventsbyfile[file] |
439 |
|
# end job using last file, use remaining events in block |
440 |
|
# close job and touch new file |
441 |
|
fullString = parString[:-2] |
442 |
< |
list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)]) |
442 |
> |
if self.useParent: |
443 |
> |
fullParentString = pString[:-2] |
444 |
> |
list_of_lists.append([fullString,fullParentString,str(-1),str(jobSkipEventCount)]) |
445 |
> |
else: |
446 |
> |
list_of_lists.append([fullString,str(-1),str(jobSkipEventCount)]) |
447 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(filesEventCount - jobSkipEventCount)+" events (last file in block).") |
448 |
|
self.jobDestination.append(blockSites[block]) |
449 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
455 |
|
eventsRemaining = eventsRemaining - filesEventCount + jobSkipEventCount |
456 |
|
jobSkipEventCount = 0 |
457 |
|
# reset file |
458 |
+ |
pString = "" |
459 |
|
parString = "" |
460 |
|
filesEventCount = 0 |
461 |
|
newFile = 1 |
468 |
|
elif ( filesEventCount - jobSkipEventCount == eventsPerJobRequested ) : |
469 |
|
# close job and touch new file |
470 |
|
fullString = parString[:-2] |
471 |
< |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
471 |
> |
if self.useParent: |
472 |
> |
fullParentString = pString[:-2] |
473 |
> |
list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
474 |
> |
else: |
475 |
> |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
476 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.") |
477 |
|
self.jobDestination.append(blockSites[block]) |
478 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
483 |
|
eventsRemaining = eventsRemaining - eventsPerJobRequested |
484 |
|
jobSkipEventCount = 0 |
485 |
|
# reset file |
486 |
+ |
pString = "" |
487 |
|
parString = "" |
488 |
|
filesEventCount = 0 |
489 |
|
newFile = 1 |
493 |
|
else : |
494 |
|
# close job but don't touch new file |
495 |
|
fullString = parString[:-2] |
496 |
< |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
496 |
> |
if self.useParent: |
497 |
> |
fullParentString = pString[:-2] |
498 |
> |
list_of_lists.append([fullString,fullParentString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
499 |
> |
else: |
500 |
> |
list_of_lists.append([fullString,str(eventsPerJobRequested),str(jobSkipEventCount)]) |
501 |
|
common.logger.debug(3,"Job "+str(jobCount+1)+" can run over "+str(eventsPerJobRequested)+" events.") |
502 |
|
self.jobDestination.append(blockSites[block]) |
503 |
|
common.logger.debug(5,"Job "+str(jobCount+1)+" Destination: "+str(self.jobDestination[jobCount])) |
511 |
|
jobSkipEventCount = eventsPerJobRequested - (filesEventCount - jobSkipEventCount - self.eventsbyfile[file]) |
512 |
|
# remove all but the last file |
513 |
|
filesEventCount = self.eventsbyfile[file] |
514 |
+ |
if self.useParent: |
515 |
+ |
for f in parent : pString += '\\\"' + f + '\\\"\,' |
516 |
|
parString = '\\\"' + file + '\\\"\,' |
517 |
|
pass # END if |
518 |
|
pass # END while (iterate over files in the block) |
753 |
|
|
754 |
|
## Add ProdCommon dir to tar |
755 |
|
prodcommonDir = 'ProdCommon' |
756 |
< |
prodcommonPath = os.environ['CRABDIR'] + '/' + 'ProdCommon' |
756 |
> |
prodcommonPath = os.environ['CRABDIR'] + '/' + 'external/' |
757 |
|
if os.path.isdir(prodcommonPath): |
758 |
|
tar.add(prodcommonPath,prodcommonDir) |
759 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
766 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
767 |
|
|
768 |
|
##### Utils |
769 |
< |
Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'JobReportErrorCode.py'] |
769 |
> |
Utils_file_list=['parseCrabFjr.py','writeCfg.py', 'fillCrabFjr.py'] |
770 |
|
for file in Utils_file_list: |
771 |
|
tar.add(path+file,file) |
772 |
|
common.logger.debug(5,"Files added to "+self.tgzNameWithPath+" : "+str(tar.getnames())) |
873 |
|
txt += 'cp $RUNTIME_AREA/'+pset+' .\n' |
874 |
|
if (self.datasetPath): # standard job |
875 |
|
txt += 'InputFiles=${args[1]}; export InputFiles\n' |
876 |
< |
txt += 'MaxEvents=${args[2]}; export MaxEvents\n' |
877 |
< |
txt += 'SkipEvents=${args[3]}; export SkipEvents\n' |
876 |
> |
if (self.useParent): |
877 |
> |
txt += 'ParentFiles=${args[2]}; export ParentFiles\n' |
878 |
> |
txt += 'MaxEvents=${args[3]}; export MaxEvents\n' |
879 |
> |
txt += 'SkipEvents=${args[4]}; export SkipEvents\n' |
880 |
> |
else: |
881 |
> |
txt += 'MaxEvents=${args[2]}; export MaxEvents\n' |
882 |
> |
txt += 'SkipEvents=${args[3]}; export SkipEvents\n' |
883 |
|
txt += 'echo "Inputfiles:<$InputFiles>"\n' |
884 |
+ |
if (self.useParent): txt += 'echo "ParentFiles:<$ParentFiles>"\n' |
885 |
|
txt += 'echo "MaxEvents:<$MaxEvents>"\n' |
886 |
|
txt += 'echo "SkipEvents:<$SkipEvents>"\n' |
887 |
|
else: # pythia like job |
899 |
|
if self.pset != None: |
900 |
|
# FUTURE: Can simply for 2_1_x and higher |
901 |
|
txt += '\n' |
902 |
< |
if self.debug_pset==True: |
902 |
> |
if self.debug_wrapper==True: |
903 |
|
txt += 'echo "***** cat ' + psetName + ' *********"\n' |
904 |
|
txt += 'cat ' + psetName + '\n' |
905 |
|
txt += 'echo "****** end ' + psetName + ' ********"\n' |
920 |
|
if os.path.isfile(self.tgzNameWithPath): |
921 |
|
txt += 'echo ">>> tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+' :" \n' |
922 |
|
txt += 'tar xzvf $RUNTIME_AREA/'+os.path.basename(self.tgzNameWithPath)+'\n' |
923 |
< |
txt += 'ls -Al \n' |
923 |
> |
if self.debug_wrapper: |
924 |
> |
txt += 'ls -Al \n' |
925 |
|
txt += 'untar_status=$? \n' |
926 |
|
txt += 'if [ $untar_status -ne 0 ]; then \n' |
927 |
|
txt += ' echo "ERROR ==> Untarring .tgz file failed"\n' |
981 |
|
""" |
982 |
|
|
983 |
|
def executableName(self): |
984 |
< |
if self.scriptExe: |
984 |
> |
if self.scriptExe: |
985 |
|
return "sh " |
986 |
|
else: |
987 |
|
return self.executable |
1040 |
|
txt = '\n#Written by cms_cmssw::wsRenameOutput\n' |
1041 |
|
txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n' |
1042 |
|
txt += 'echo ">>> current directory content:"\n' |
1043 |
< |
txt += 'ls \n' |
1043 |
> |
if self.debug_wrapper: |
1044 |
> |
txt += 'ls -Al\n' |
1045 |
|
txt += '\n' |
1046 |
|
|
1047 |
|
for fileWithSuffix in (self.output_file): |
1051 |
|
txt += 'if [ -e ./'+fileWithSuffix+' ] ; then\n' |
1052 |
|
if (self.copy_data == 1): # For OSG nodes, file is in $WORKING_DIR, should not be moved to $RUNTIME_AREA |
1053 |
|
txt += ' mv '+fileWithSuffix+' '+output_file_num+'\n' |
1054 |
< |
txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n' |
1054 |
> |
#txt += ' ln -s `pwd`/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n' |
1055 |
|
else: |
1056 |
|
txt += ' mv '+fileWithSuffix+' $RUNTIME_AREA/'+output_file_num+'\n' |
1057 |
|
txt += ' ln -s $RUNTIME_AREA/'+output_file_num+' $RUNTIME_AREA/'+fileWithSuffix+'\n' |
1072 |
|
txt += '\n' |
1073 |
|
txt += 'echo ">>> current directory (SOFTWARE_DIR): $SOFTWARE_DIR" \n' |
1074 |
|
txt += 'echo ">>> current directory content:"\n' |
1075 |
< |
txt += 'ls \n' |
1075 |
> |
if self.debug_wrapper: |
1076 |
> |
txt += 'ls -Al\n' |
1077 |
|
txt += '\n' |
1078 |
|
txt += 'cd $RUNTIME_AREA\n' |
1079 |
|
txt += 'echo ">>> current directory (RUNTIME_AREA): $RUNTIME_AREA"\n' |
1106 |
|
req='Member("VO-cms-' + \ |
1107 |
|
self.version + \ |
1108 |
|
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
1109 |
< |
if self.executable_arch: |
1109 |
> |
if self.executable_arch: |
1110 |
|
req+=' && Member("VO-cms-' + \ |
1111 |
|
self.executable_arch + \ |
1112 |
|
'", other.GlueHostApplicationSoftwareRunTimeEnvironment)' |
1222 |
|
txt += 'fi\n' |
1223 |
|
return txt |
1224 |
|
|
1225 |
< |
def wsParseFJR(self): |
1225 |
> |
def wsParseFJR(self): |
1226 |
|
""" |
1227 |
< |
Parse the FrameworkJobReport to obtain useful infos |
1227 |
> |
Parse the FrameworkJobReport to obtain useful infos |
1228 |
|
""" |
1229 |
|
txt = '\n#Written by cms_cmssw::wsParseFJR\n' |
1230 |
|
txt += 'echo ">>> Parse FrameworkJobReport crab_fjr.xml"\n' |
1231 |
|
txt += 'if [ -s $RUNTIME_AREA/crab_fjr_$NJob.xml ]; then\n' |
1232 |
|
txt += ' if [ -s $RUNTIME_AREA/parseCrabFjr.py ]; then\n' |
1233 |
< |
txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --MonitorID $MonitorID --MonitorJobID $MonitorJobID`\n' |
1234 |
< |
txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n' |
1235 |
< |
txt += ' executable_exit_status=`echo $cmd_out | awk -F\; "{print $1}" | awk -F ' ' "{print $NF}"`\n' |
1233 |
> |
txt += ' cmd_out=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --dashboard $MonitorID,$MonitorJobID '+self.debugWrap+'`\n' |
1234 |
> |
if self.debug_wrapper : |
1235 |
> |
txt += ' echo "Result of parsing the FrameworkJobReport crab_fjr.xml: $cmd_out"\n' |
1236 |
> |
txt += ' executable_exit_status=`python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --exitcode`\n' |
1237 |
|
txt += ' if [ $executable_exit_status -eq 50115 ];then\n' |
1238 |
|
txt += ' echo ">>> crab_fjr.xml contents: "\n' |
1239 |
|
txt += ' cat $RUNTIME_AREA/crab_fjr_NJob.xml\n' |
1240 |
|
txt += ' echo "Wrong FrameworkJobReport --> does not contain useful info. ExitStatus: $executable_exit_status"\n' |
1241 |
+ |
txt += ' elif [ $executable_exit_status -eq -999 ];then\n' |
1242 |
+ |
txt += ' echo "ExitStatus from FrameworkJobReport not available. not available. Using exit code of executable from command line."\n' |
1243 |
|
txt += ' else\n' |
1244 |
|
txt += ' echo "Extracted ExitStatus from FrameworkJobReport parsing output: $executable_exit_status"\n' |
1245 |
|
txt += ' fi\n' |
1252 |
|
# VERIFY PROCESSED DATA |
1253 |
|
txt += ' if [ $executable_exit_status -eq 0 ];then\n' |
1254 |
|
txt += ' echo ">>> Verify list of processed files:"\n' |
1255 |
< |
txt += ' echo $InputFiles |tr -d "\\" |tr "," \n"|tr -d "\"" > input-files.txt\n' |
1256 |
< |
txt += ' grep LFN $RUNTIME_AREA/crab_fjr_$NJob.xml |cut -d">" -f2|cut -d"<" -f1|grep "/" > processed-files.txt\n' |
1255 |
> |
txt += ' echo $InputFiles |tr -d \'\\\\\' |tr \',\' \'\\n\'|tr -d \'"\' > input-files.txt\n' |
1256 |
> |
txt += ' python $RUNTIME_AREA/parseCrabFjr.py --input $RUNTIME_AREA/crab_fjr_$NJob.xml --lfn > processed-files.txt\n' |
1257 |
|
txt += ' cat input-files.txt | sort | uniq > tmp.txt\n' |
1258 |
|
txt += ' mv tmp.txt input-files.txt\n' |
1259 |
|
txt += ' echo "cat input-files.txt"\n' |