CRAB/python/ModifyJobReport.py

#!/usr/bin/env python
"""
_ModifyJobReport.py

Example of how to use the FwkJobRep package to update a job report post processing


"""
import os, string
import sys
import popen2

from ProdCommon.FwkJobRep.ReportParser import readJobReport


def readCksum(filename):
    """
    _readCksum_

    Run a cksum command on a file an return the checksum value

    """
    pop = popen2.Popen4("cksum %s" % filename)
    while pop.poll() == -1:
        exitStatus = pop.poll()
    exitStatus = pop.poll()
    if exitStatus:
        return None
    content = pop.fromchild.read()
    value = content.strip()
    value = content.split()[0]
    print "checksum = ", value
    return value


def fileSize(filename):
    """
    _fileSize_

    Get size of file

    """
    print "(size) os.stat(filename)[6] = ", os.stat(filename)[6]
    return os.stat(filename)[6]    
    

def addFileStats(file):
    """
    _addFileStats_

    Add checksum and size info to each size

    """
    #if not os.path.exists(file['PFN']):
    #    print "Error: Cannot find file: %s " % file['PFN']
    #    return 1 
    file['Size'] = fileSize(file['PFN'])
    checkSum = readCksum(file['PFN'])
    file.addChecksum('cksum',checkSum)
    return


def modifyFile(file):
    """
    _modifyFile_
    
    Calls functions to modify PFN and LFN
    """

    str.split(str(file['PFN']), '.root')
    pref =  str.split(str(file['PFN']), '.root')[0]
    suff = str.split(str(file['PFN']), pref)[1]
    
    ### FEDE changing se_path with  the endpoint
    #newPfn = diz['se_name'] + diz['se_path'] + pref + '_' + diz['n_job'] + suff
    newPfn = diz['se_path'] + pref + '_' + diz['n_job'] + suff
    print "newPfn = ", newPfn
    #########################

    newLfn = diz['for_lfn'] + pref + '_' + diz['n_job'] + suff
    print "newLfn = ", newLfn

    updatePFN(file, file['LFN'], newPfn)

    updateLFN(file, file['LFN'], newLfn)

    return


def updatePFN(file, lfn, newPFN):
    """
    _updatePFN_

    Update a PFN for an LFN, based on a stage out to some SE.
    """
    if file['LFN'] != lfn:
        return

    file['PFN'] = newPFN
    file['SEName'] = diz['se_name']
    return


def updateLFN(file, lfn, newLFN):
    """
    _updateLFN_

    Update a LFN.
    """
    if file['LFN'] != lfn:
        return
    file['LFN'] = newLFN
    return


if __name__ == '__main__':

    # Example:  Load the report, update the file stats, pretend to do a stage out
    # and update the information for the stage out


    L = sys.argv[1:]
    if len(L) < 21:
        print "Error: wrong number of arguments passed to the ModifyJobreport. Please check your script"
        sys.exit(1)
    diz={}
    
    i = 0
    while i < len(L):
        diz[L[i]] = L[i+1]
        i = i + 2

    if diz.has_key('fjr'):
        inputReport = diz['fjr']
        reports = readJobReport(inputReport)
    
        # report is an instance of FwkJobRep.FwkJobReport class
        # can be N in a file, so a list is always returned
        # by for readJobReport, here I am assuming just one report per file for simplicity
        try:   
            report = reports[-1]
        except IndexError:
            print "Error: No file to publish in xml file"
            sys.exit(1)
    else:
        print "no crab fjr found"
        sys.exit(1)


    # ARGs parameters
    if diz.has_key('n_job'):
        n_job = diz['n_job'] 
    else:
        print "it is necessary to specify the job number" 
        sys.exit(1)
        
    if diz.has_key('UserProcessedDataset'): 
        UserProcessedDataset = diz['UserProcessedDataset']
    else:
        UserProcessedDataset=''
    print "UserProcessedDataset = ", UserProcessedDataset
    
    #### Adding AnalysisFile ####
    if (len(report.files) == 0) and (len(report.analysisFiles) == 0):
       print "no EDM_output file or NO_EDM_output to modify"
       print "Adding a no EDM_output file"
       files=str.split(str(diz['file_list']), ',')
       #print "files = ", files 
       for file in files:
           split = str.split(str(file), '/')
           if (len(split) > 0):
               file_name = split[len(split)-1]
           else:
               file_name = file    

           report.newAnalysisFile()
           for aFile in report.analysisFiles:
               if (aFile['SEName'] == None):
                   aFile['SEName']=diz['se_name']
               if (aFile['LFN'] == None):    
                   aFile['LFN']=diz['for_lfn']+file_name
               if (aFile['PFN'] == None):    
                   aFile['PFN']=diz['se_path']+file_name
           report.save()
        
       report.write("NewFrameworkJobReport.xml")         
    else:
        if (len(report.files) != 0):
            for f in report.files:
                if (string.find(f['PFN'], ':') != -1):
                    tmp_path = string.split(f['PFN'], ':')
                    f['PFN'] = tmp_path[1]
                if not os.path.exists(f['PFN']):
                    print "Error: Cannot find file: %s " % f['PFN']
                    sys.exit(1)
                #Generate per file stats
                addFileStats(f)

                datasetinfo=f.newDataset()
                datasetinfo['PrimaryDataset'] = diz['PrimaryDataset'] 
                datasetinfo['DataTier'] = "USER" 
                datasetinfo['ProcessedDataset'] = UserProcessedDataset 
                datasetinfo['ApplicationFamily'] = diz['ApplicationFamily'] 
                datasetinfo['ApplicationName'] = diz['ApplicationName'] 
                datasetinfo['ApplicationVersion'] = diz['cmssw_version'] 
                datasetinfo['PSetHash'] = diz['psethash']
                datasetinfo['PSetContent'] = "TOBEADDED"
                ### to check if the job output is composed by more files
                modifyFile(f)    

        if (len(report.analysisFiles) != 0):
            for aFile in report.analysisFiles:
                aFile['PFN'] = os.path.basename(aFile['FileName'])
                modifyFile(aFile)
                
        # After modifying the report, you can then save it to a file.
        report.write("NewFrameworkJobReport.xml")
    

Revision:	1.12
Committed:	Wed Mar 17 17:58:51 2010 UTC (15 years, 1 month ago) by spiga
Content type:	text/x-python
Branch:	MAIN
CVS Tags:	HEAD
Changes since 1.11:	+0 -0 lines
State:	*FILE REMOVED*
Log Message:	remove unesed file
#	Content
1	#!/usr/bin/env python
2	"""
3	_ModifyJobReport.py
4
5	Example of how to use the FwkJobRep package to update a job report post processing
6
7
8	"""
9	import os, string
10	import sys
11	import popen2
12
13	from ProdCommon.FwkJobRep.ReportParser import readJobReport
14
15
16	def readCksum(filename):
17	"""
18	_readCksum_
19
20	Run a cksum command on a file an return the checksum value
21
22	"""
23	pop = popen2.Popen4("cksum %s" % filename)
24	while pop.poll() == -1:
25	exitStatus = pop.poll()
26	exitStatus = pop.poll()
27	if exitStatus:
28	return None
29	content = pop.fromchild.read()
30	value = content.strip()
31	value = content.split()[0]
32	print "checksum = ", value
33	return value
34
35
36	def fileSize(filename):
37	"""
38	_fileSize_
39
40	Get size of file
41
42	"""
43	print "(size) os.stat(filename)[6] = ", os.stat(filename)[6]
44	return os.stat(filename)[6]
45
46
47	def addFileStats(file):
48	"""
49	_addFileStats_
50
51	Add checksum and size info to each size
52
53	"""
54	#if not os.path.exists(file['PFN']):
55	# print "Error: Cannot find file: %s " % file['PFN']
56	# return 1
57	file['Size'] = fileSize(file['PFN'])
58	checkSum = readCksum(file['PFN'])
59	file.addChecksum('cksum',checkSum)
60	return
61
62
63	def modifyFile(file):
64	"""
65	_modifyFile_
66
67	Calls functions to modify PFN and LFN
68	"""
69
70	str.split(str(file['PFN']), '.root')
71	pref = str.split(str(file['PFN']), '.root')[0]
72	suff = str.split(str(file['PFN']), pref)[1]
73
74	### FEDE changing se_path with the endpoint
75	#newPfn = diz['se_name'] + diz['se_path'] + pref + '_' + diz['n_job'] + suff
76	newPfn = diz['se_path'] + pref + '_' + diz['n_job'] + suff
77	print "newPfn = ", newPfn
78	#########################
79
80	newLfn = diz['for_lfn'] + pref + '_' + diz['n_job'] + suff
81	print "newLfn = ", newLfn
82
83	updatePFN(file, file['LFN'], newPfn)
84
85	updateLFN(file, file['LFN'], newLfn)
86
87	return
88
89
90	def updatePFN(file, lfn, newPFN):
91	"""
92	_updatePFN_
93
94	Update a PFN for an LFN, based on a stage out to some SE.
95	"""
96	if file['LFN'] != lfn:
97	return
98
99	file['PFN'] = newPFN
100	file['SEName'] = diz['se_name']
101	return
102
103
104	def updateLFN(file, lfn, newLFN):
105	"""
106	_updateLFN_
107
108	Update a LFN.
109	"""
110	if file['LFN'] != lfn:
111	return
112	file['LFN'] = newLFN
113	return
114
115
116	if __name__ == '__main__':
117
118	# Example: Load the report, update the file stats, pretend to do a stage out
119	# and update the information for the stage out
120
121
122	L = sys.argv[1:]
123	if len(L) < 21:
124	print "Error: wrong number of arguments passed to the ModifyJobreport. Please check your script"
125	sys.exit(1)
126	diz={}
127
128	i = 0
129	while i < len(L):
130	diz[L[i]] = L[i+1]
131	i = i + 2
132
133	if diz.has_key('fjr'):
134	inputReport = diz['fjr']
135	reports = readJobReport(inputReport)
136
137	# report is an instance of FwkJobRep.FwkJobReport class
138	# can be N in a file, so a list is always returned
139	# by for readJobReport, here I am assuming just one report per file for simplicity
140	try:
141	report = reports[-1]
142	except IndexError:
143	print "Error: No file to publish in xml file"
144	sys.exit(1)
145	else:
146	print "no crab fjr found"
147	sys.exit(1)
148
149
150	# ARGs parameters
151	if diz.has_key('n_job'):
152	n_job = diz['n_job']
153	else:
154	print "it is necessary to specify the job number"
155	sys.exit(1)
156
157	if diz.has_key('UserProcessedDataset'):
158	UserProcessedDataset = diz['UserProcessedDataset']
159	else:
160	UserProcessedDataset=''
161	print "UserProcessedDataset = ", UserProcessedDataset
162
163	#### Adding AnalysisFile ####
164	if (len(report.files) == 0) and (len(report.analysisFiles) == 0):
165	print "no EDM_output file or NO_EDM_output to modify"
166	print "Adding a no EDM_output file"
167	files=str.split(str(diz['file_list']), ',')
168	#print "files = ", files
169	for file in files:
170	split = str.split(str(file), '/')
171	if (len(split) > 0):
172	file_name = split[len(split)-1]
173	else:
174	file_name = file
175
176	report.newAnalysisFile()
177	for aFile in report.analysisFiles:
178	if (aFile['SEName'] == None):
179	aFile['SEName']=diz['se_name']
180	if (aFile['LFN'] == None):
181	aFile['LFN']=diz['for_lfn']+file_name
182	if (aFile['PFN'] == None):
183	aFile['PFN']=diz['se_path']+file_name
184	report.save()
185
186	report.write("NewFrameworkJobReport.xml")
187	else:
188	if (len(report.files) != 0):
189	for f in report.files:
190	if (string.find(f['PFN'], ':') != -1):
191	tmp_path = string.split(f['PFN'], ':')
192	f['PFN'] = tmp_path[1]
193	if not os.path.exists(f['PFN']):
194	print "Error: Cannot find file: %s " % f['PFN']
195	sys.exit(1)
196	#Generate per file stats
197	addFileStats(f)
198
199	datasetinfo=f.newDataset()
200	datasetinfo['PrimaryDataset'] = diz['PrimaryDataset']
201	datasetinfo['DataTier'] = "USER"
202	datasetinfo['ProcessedDataset'] = UserProcessedDataset
203	datasetinfo['ApplicationFamily'] = diz['ApplicationFamily']
204	datasetinfo['ApplicationName'] = diz['ApplicationName']
205	datasetinfo['ApplicationVersion'] = diz['cmssw_version']
206	datasetinfo['PSetHash'] = diz['psethash']
207	datasetinfo['PSetContent'] = "TOBEADDED"
208	### to check if the job output is composed by more files
209	modifyFile(f)
210
211	if (len(report.analysisFiles) != 0):
212	for aFile in report.analysisFiles:
213	aFile['PFN'] = os.path.basename(aFile['FileName'])
214	modifyFile(aFile)
215
216	# After modifying the report, you can then save it to a file.
217	report.write("NewFrameworkJobReport.xml")
218
219
220