1 |
import os, sys, re
|
2 |
|
3 |
###############################################################
|
4 |
|
5 |
#Usage:
|
6 |
# python onlyLast.py <path to input directory on CASTOR>
|
7 |
|
8 |
#now updated to handle output from the latest CRAB with
|
9 |
#file names like this:
|
10 |
# myfilename_1_1_aBC.root
|
11 |
|
12 |
# 02/09/11 - Don
|
13 |
# updated to handle files on the T3
|
14 |
# this script now automatically checks if the input dir is pointing
|
15 |
# to the T3 and adapts accordingly
|
16 |
# e.g. python onlyLast.py "$CUSE/blah"
|
17 |
|
18 |
###############################################################
|
19 |
|
20 |
#handle input arguments
|
21 |
#this includes the name of the script
|
22 |
##for arg in sys.argv:
|
23 |
## print arg
|
24 |
|
25 |
#sys.argv[1] will have the first argument
|
26 |
tmpfile = '.onlyLast_py_'
|
27 |
mypid = os.getpid()
|
28 |
tmpfile += `mypid`
|
29 |
|
30 |
|
31 |
inputdir = sys.argv[1]
|
32 |
if inputdir[len(inputdir)-1] != '/':
|
33 |
inputdir+='/'
|
34 |
|
35 |
#check if the input directory is located on the T3
|
36 |
isT3 = False
|
37 |
checkT3 = 'echo '
|
38 |
checkT3 += inputdir
|
39 |
checkT3 += ' | awk -F "/" \'{print $1}\' '
|
40 |
p = os.popen(checkT3)
|
41 |
location = p.readline()
|
42 |
p.close()
|
43 |
#print location
|
44 |
if "srm" in location:
|
45 |
print "T3 directory specified. Using grid commands."
|
46 |
isT3 = True
|
47 |
|
48 |
|
49 |
outputdir = inputdir
|
50 |
outputdir += 'EXTRAS'
|
51 |
|
52 |
mkdircommand = "nsmkdir "
|
53 |
mkdircommand += outputdir
|
54 |
|
55 |
#if isT3 == 'T3':
|
56 |
# mkdircommand = "srmmkdir "
|
57 |
# mkdircommand += outputdir
|
58 |
|
59 |
|
60 |
#get a file listing into tmpfile
|
61 |
cmd = 'nsls -l '
|
62 |
cmd += inputdir
|
63 |
cmd += ' | awk \'// {print $5,$9;}\' > '
|
64 |
|
65 |
|
66 |
if isT3 == True:
|
67 |
cmd = 'srmls "'
|
68 |
cmd += inputdir
|
69 |
cmd += '" | awk -F "/" \'{print $1,$NF}\' | awk \'NF>0\' > '
|
70 |
|
71 |
|
72 |
cmd+= tmpfile
|
73 |
#print cmd
|
74 |
os.system(cmd)
|
75 |
|
76 |
#keep track of whether we made the directory or not
|
77 |
alreadymadedir = 0
|
78 |
#do not manually make the directory for the T3, lcg-cp will take care of it
|
79 |
if isT3 == True:
|
80 |
alreadymadedir = 1
|
81 |
|
82 |
|
83 |
|
84 |
|
85 |
f = open(tmpfile,'r')
|
86 |
|
87 |
#to deal with the arbitrary extension to the filename that crab
|
88 |
#now adds, I will add a second associative array to keep track of the
|
89 |
#whole file name. (completefilenames)
|
90 |
#The only subtle part is where the file names are sorted.
|
91 |
#Hopefully this does not create any bugs
|
92 |
indexdict = {}
|
93 |
completefilenames = {}
|
94 |
|
95 |
stub = 'stub'
|
96 |
|
97 |
#do some accounting
|
98 |
nmoved = 0
|
99 |
|
100 |
for line in f:
|
101 |
mypair = line.split()
|
102 |
#file size
|
103 |
size = int(mypair[0])
|
104 |
#parse filename
|
105 |
result = re.match('(.*)_([0-9]*)_([0-9]*)_(.*).root',mypair[1])
|
106 |
if size > 0:
|
107 |
if stub=='stub':
|
108 |
stub = result.group(1)
|
109 |
elif stub!=result.group(1):
|
110 |
print "WARNING -- there is more than one group of filenames here!"
|
111 |
|
112 |
if result.group(2) in indexdict:
|
113 |
#so the key already exists
|
114 |
indexdict[result.group(2)].append(result.group(3))
|
115 |
completefilenames[result.group(2)].append(mypair[1])
|
116 |
else:
|
117 |
indexdict[result.group(2)] = [result.group(3)]
|
118 |
completefilenames[result.group(2)] = [mypair[1]]
|
119 |
# print result.group(0)
|
120 |
# print result.group(1)
|
121 |
# print result.group(2)
|
122 |
# print result.group(3)
|
123 |
|
124 |
#print indexdict
|
125 |
|
126 |
for ii in indexdict:
|
127 |
if len(indexdict[ii])==1:
|
128 |
print "nothing to do for ",ii
|
129 |
else:
|
130 |
indexdict[ii].sort()
|
131 |
#will this work? in my one test case, yes
|
132 |
completefilenames[ii].sort()
|
133 |
goodindex = indexdict[ii].pop()
|
134 |
goodfilename = completefilenames[ii].pop()
|
135 |
print ii, ": keeping index ",goodindex
|
136 |
print ii, ": corresponds to file ",goodfilename
|
137 |
for jj in completefilenames[ii]:
|
138 |
if alreadymadedir == 0:
|
139 |
print mkdircommand
|
140 |
#this will give a harmless error if the dir already exists
|
141 |
os.system(mkdircommand)
|
142 |
|
143 |
s=inputdir
|
144 |
s+=jj
|
145 |
print "moving ", s
|
146 |
cpcmd = 'rfcp '
|
147 |
if isT3 == True:
|
148 |
cpcmd = 'lcg-cp --verbose -b -D srmv2 "'
|
149 |
cpcmd += s
|
150 |
|
151 |
if isT3 == True:
|
152 |
cpcmd += '" "'
|
153 |
else: cpcmd += ' '
|
154 |
|
155 |
cpcmd += outputdir
|
156 |
if isT3 == True:
|
157 |
cpcmd +='/'
|
158 |
cpcmd +=jj
|
159 |
cpcmd += '"'
|
160 |
print cpcmd
|
161 |
os.system(cpcmd)
|
162 |
rmcmd = 'rfrm '
|
163 |
if isT3 == True:
|
164 |
rmcmd = 'srmrm '
|
165 |
rmcmd += s
|
166 |
print rmcmd
|
167 |
os.system(rmcmd)
|
168 |
nmoved = nmoved+1
|
169 |
|
170 |
f.close()
|
171 |
os.remove(tmpfile)
|
172 |
|
173 |
print "----------------------------"
|
174 |
print "I moved this many files: ",nmoved
|
175 |
print "----------------------------"
|
176 |
|
177 |
|
178 |
|