1 |
|
import os, sys, re |
2 |
|
|
3 |
+ |
############################################################### |
4 |
+ |
|
5 |
+ |
#Usage: |
6 |
+ |
# python onlyLast.py <path to input directory on CASTOR> |
7 |
+ |
|
8 |
+ |
#now updated to handle output from the latest CRAB with |
9 |
+ |
#file names like this: |
10 |
+ |
# myfilename_1_1_aBC.root |
11 |
+ |
|
12 |
+ |
# 02/09/11 - Don |
13 |
+ |
# updated to handle files on the T3 |
14 |
+ |
# this script now automatically checks if the input dir is pointing |
15 |
+ |
# to the T3 and adapts accordingly |
16 |
+ |
# e.g. python onlyLast.py "$CUSE/blah" |
17 |
+ |
|
18 |
+ |
############################################################### |
19 |
+ |
|
20 |
|
#handle input arguments |
21 |
|
#this includes the name of the script |
22 |
|
##for arg in sys.argv: |
23 |
|
## print arg |
24 |
|
|
25 |
|
#sys.argv[1] will have the first argument |
26 |
< |
tmpfile = '/tmp/joshmt/onlyLast_py_' |
26 |
> |
tmpfile = '.onlyLast_py_' |
27 |
|
mypid = os.getpid() |
28 |
|
tmpfile += `mypid` |
29 |
|
|
30 |
+ |
|
31 |
|
inputdir = sys.argv[1] |
32 |
|
if inputdir[len(inputdir)-1] != '/': |
33 |
|
inputdir+='/' |
34 |
|
|
35 |
+ |
#check if the input directory is located on the T3 |
36 |
+ |
isT3 = False |
37 |
+ |
checkT3 = 'echo ' |
38 |
+ |
checkT3 += inputdir |
39 |
+ |
checkT3 += ' | awk -F "/" \'{print $1}\' ' |
40 |
+ |
p = os.popen(checkT3) |
41 |
+ |
location = p.readline() |
42 |
+ |
p.close() |
43 |
+ |
#print location |
44 |
+ |
if "srm" in location: |
45 |
+ |
print "T3 directory specified. Using grid commands." |
46 |
+ |
isT3 = True |
47 |
+ |
|
48 |
+ |
|
49 |
|
outputdir = inputdir |
50 |
|
outputdir += 'EXTRAS' |
51 |
|
|
52 |
|
mkdircommand = "nsmkdir " |
53 |
|
mkdircommand += outputdir |
54 |
|
|
55 |
+ |
#if isT3 == 'T3': |
56 |
+ |
# mkdircommand = "srmmkdir " |
57 |
+ |
# mkdircommand += outputdir |
58 |
+ |
|
59 |
+ |
|
60 |
|
#get a file listing into tmpfile |
61 |
|
cmd = 'nsls -l ' |
62 |
|
cmd += inputdir |
63 |
|
cmd += ' | awk \'// {print $5,$9;}\' > ' |
64 |
< |
cmd += tmpfile |
64 |
> |
|
65 |
> |
|
66 |
> |
if isT3 == True: |
67 |
> |
cmd = 'srmls "' |
68 |
> |
cmd += inputdir |
69 |
> |
cmd += '" | awk -F "/" \'{print $1,$NF}\' | awk \'NF>0\' > ' |
70 |
> |
|
71 |
> |
|
72 |
> |
cmd+= tmpfile |
73 |
|
#print cmd |
74 |
|
os.system(cmd) |
75 |
|
|
76 |
< |
print mkdircommand |
77 |
< |
#this will give a harmless error if the dir already exists |
78 |
< |
os.system(mkdircommand) |
76 |
> |
#keep track of whether we made the directory or not |
77 |
> |
alreadymadedir = 0 |
78 |
> |
#do not manually make the directory for the T3, lcg-cp will take care of it |
79 |
> |
if isT3 == True: |
80 |
> |
alreadymadedir = 1 |
81 |
> |
|
82 |
> |
|
83 |
> |
|
84 |
|
|
85 |
|
f = open(tmpfile,'r') |
86 |
|
|
87 |
+ |
#to deal with the arbitrary extension to the filename that crab |
88 |
+ |
#now adds, I will add a second associative array to keep track of the |
89 |
+ |
#whole file name. (completefilenames) |
90 |
+ |
#The only subtle part is where the file names are sorted. |
91 |
+ |
#Hopefully this does not create any bugs |
92 |
|
indexdict = {} |
93 |
+ |
completefilenames = {} |
94 |
|
|
95 |
|
stub = 'stub' |
96 |
|
|
97 |
+ |
#do some accounting |
98 |
+ |
nmoved = 0 |
99 |
+ |
|
100 |
|
for line in f: |
101 |
|
mypair = line.split() |
102 |
|
#file size |
103 |
|
size = int(mypair[0]) |
104 |
|
#parse filename |
105 |
< |
result = re.match('(.*)_([0-9]*)_([0-9]*).root',mypair[1]) |
105 |
> |
result = re.match('(.*)_([0-9]*)_([0-9]*)_(.*).root',mypair[1]) |
106 |
|
if size > 0: |
107 |
|
if stub=='stub': |
108 |
|
stub = result.group(1) |
109 |
|
elif stub!=result.group(1): |
110 |
< |
print "oh no!" |
110 |
> |
print "WARNING -- there is more than one group of filenames here!" |
111 |
|
|
112 |
|
if result.group(2) in indexdict: |
113 |
|
#so the key already exists |
114 |
|
indexdict[result.group(2)].append(result.group(3)) |
115 |
+ |
completefilenames[result.group(2)].append(mypair[1]) |
116 |
|
else: |
117 |
|
indexdict[result.group(2)] = [result.group(3)] |
118 |
+ |
completefilenames[result.group(2)] = [mypair[1]] |
119 |
|
# print result.group(0) |
120 |
|
# print result.group(1) |
121 |
|
# print result.group(2) |
128 |
|
print "nothing to do for ",ii |
129 |
|
else: |
130 |
|
indexdict[ii].sort() |
131 |
+ |
#will this work? in my one test case, yes |
132 |
+ |
completefilenames[ii].sort() |
133 |
|
goodindex = indexdict[ii].pop() |
134 |
+ |
goodfilename = completefilenames[ii].pop() |
135 |
|
print ii, ": keeping index ",goodindex |
136 |
< |
for jj in indexdict[ii]: |
136 |
> |
print ii, ": corresponds to file ",goodfilename |
137 |
> |
for jj in completefilenames[ii]: |
138 |
> |
if alreadymadedir == 0: |
139 |
> |
print mkdircommand |
140 |
> |
#this will give a harmless error if the dir already exists |
141 |
> |
os.system(mkdircommand) |
142 |
> |
|
143 |
|
s=inputdir |
74 |
– |
s+=stub |
75 |
– |
s+='_' |
76 |
– |
s+=ii |
77 |
– |
s+='_' |
144 |
|
s+=jj |
145 |
< |
s+='.root' |
80 |
< |
# print "moving ", s |
145 |
> |
print "moving ", s |
146 |
|
cpcmd = 'rfcp ' |
147 |
+ |
if isT3 == True: |
148 |
+ |
cpcmd = 'lcg-cp --verbose -b -D srmv2 "' |
149 |
|
cpcmd += s |
150 |
< |
cpcmd += ' ' |
150 |
> |
|
151 |
> |
if isT3 == True: |
152 |
> |
cpcmd += '" "' |
153 |
> |
else: cpcmd += ' ' |
154 |
> |
|
155 |
|
cpcmd += outputdir |
156 |
+ |
if isT3 == True: |
157 |
+ |
cpcmd +='/' |
158 |
+ |
cpcmd +=jj |
159 |
+ |
cpcmd += '"' |
160 |
|
print cpcmd |
161 |
|
os.system(cpcmd) |
162 |
|
rmcmd = 'rfrm ' |
163 |
+ |
if isT3 == True: |
164 |
+ |
rmcmd = 'srmrm ' |
165 |
|
rmcmd += s |
166 |
|
print rmcmd |
167 |
|
os.system(rmcmd) |
168 |
< |
|
168 |
> |
nmoved = nmoved+1 |
169 |
|
|
170 |
|
f.close() |
171 |
|
os.remove(tmpfile) |
172 |
|
|
173 |
+ |
print "----------------------------" |
174 |
+ |
print "I moved this many files: ",nmoved |
175 |
+ |
print "----------------------------" |
176 |
+ |
|
177 |
+ |
|
178 |
|
|