1 |
slacapra |
1.1 |
import types, string
|
2 |
|
|
|
3 |
|
|
"""
|
4 |
|
|
Unserialize class for the PHP RefDB serialization format.
|
5 |
|
|
|
6 |
|
|
@version v1.0
|
7 |
|
|
@@author Giovanni Ciraolo
|
8 |
|
|
|
9 |
|
|
@ From A Project of
|
10 |
|
|
@Scott Hurring; scott at hurring dot com
|
11 |
|
|
@copyright Copyright (c) 2005 Scott Hurring
|
12 |
|
|
@license http://opensource.org/licenses/gpl-license.php GNU Public License
|
13 |
|
|
$Id: unserializePHP.py,v 1.1 2005/05/16 13:51:22 afanfani Exp $
|
14 |
|
|
|
15 |
|
|
Most recent version can be found at:
|
16 |
|
|
http://hurring.com/code/python/phpserialize/
|
17 |
|
|
|
18 |
|
|
Usage:
|
19 |
|
|
# Create an instance of the unserialize engine
|
20 |
|
|
u = PHPUnserialize()
|
21 |
|
|
# unserialize some string into python data
|
22 |
|
|
data = u.unserialize(serialized_string)
|
23 |
|
|
|
24 |
|
|
Please see README.txt for more information.
|
25 |
|
|
"""
|
26 |
|
|
class PHPUnserializeError:
|
27 |
|
|
def __init__(self,line):
|
28 |
|
|
print '\nERROR Unserializing: '+line+'\n'
|
29 |
|
|
pass
|
30 |
|
|
|
31 |
|
|
class PHPUnserialize(object):
|
32 |
|
|
"""
|
33 |
|
|
Class to unserialize something from the PHP Serialize format.
|
34 |
|
|
|
35 |
|
|
Usage:
|
36 |
|
|
u = PHPUnserialize()
|
37 |
|
|
data = u.unserialize(serialized_string)
|
38 |
|
|
"""
|
39 |
|
|
|
40 |
|
|
def __init__(self):
|
41 |
|
|
pass
|
42 |
|
|
|
43 |
|
|
def unserialize(self, data):
|
44 |
|
|
return self._unserialize(data, 0)[2]
|
45 |
|
|
|
46 |
|
|
def _unserialize(self, data, offset=0):
|
47 |
|
|
"""
|
48 |
|
|
Find the next token and unserialize it.
|
49 |
|
|
Recurse on array.
|
50 |
|
|
|
51 |
|
|
offset = raw offset from start of data
|
52 |
|
|
"""
|
53 |
|
|
|
54 |
|
|
buf = []
|
55 |
|
|
dtype = string.lower(data[offset:offset+1])
|
56 |
|
|
|
57 |
|
|
#print "# dtype =", dtype
|
58 |
|
|
|
59 |
|
|
# 't:' = 2 chars
|
60 |
|
|
dataoffset = offset + 2
|
61 |
|
|
typeconvert = lambda x : x
|
62 |
|
|
chars = datalength = 0
|
63 |
|
|
|
64 |
|
|
# int => Integer
|
65 |
|
|
if dtype == 'i':
|
66 |
|
|
typeconvert = lambda x : int(x)
|
67 |
|
|
(chars, readdata) = self.read_until(data, dataoffset, ';')
|
68 |
|
|
# +1 for end semicolon
|
69 |
|
|
dataoffset += chars + 1
|
70 |
|
|
elif dtype == 'o':
|
71 |
|
|
(chars, keys) = self.read_until(data, dataoffset, ':')
|
72 |
|
|
dataoffset += chars + 1
|
73 |
|
|
(chars, keys) = self.read_until(data, dataoffset, ':')
|
74 |
|
|
dataoffset += chars + 1
|
75 |
|
|
(chars, keys) = self.read_until(data, dataoffset, ':')
|
76 |
|
|
#print chars,keys
|
77 |
|
|
dataoffset += chars + 2
|
78 |
|
|
readdata = {}
|
79 |
|
|
for i in range(0, int(keys)):
|
80 |
|
|
(ktype, kchars, key) = self._unserialize(data, dataoffset)
|
81 |
|
|
dataoffset += kchars
|
82 |
|
|
# print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
|
83 |
|
|
|
84 |
|
|
# Read value of the key
|
85 |
|
|
(vtype, vchars, value) = self._unserialize(data, dataoffset)
|
86 |
|
|
dataoffset += vchars
|
87 |
|
|
# print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
|
88 |
|
|
|
89 |
|
|
# Set the list element
|
90 |
|
|
readdata[key] = value
|
91 |
|
|
|
92 |
|
|
|
93 |
|
|
# +1 for end semicolon
|
94 |
|
|
|
95 |
|
|
dataoffset += chars - 1
|
96 |
|
|
# bool => Boolean
|
97 |
|
|
elif dtype == 'b':
|
98 |
|
|
typeconvert = lambda x : (int(x) == 1)
|
99 |
|
|
(chars, readdata) = self.read_until(data, dataoffset, ';')
|
100 |
|
|
# +1 for end semicolon
|
101 |
|
|
dataoffset += chars + 1
|
102 |
|
|
|
103 |
|
|
# double => Floating Point
|
104 |
|
|
elif dtype == 'd':
|
105 |
|
|
typeconvert = lambda x : float(x)
|
106 |
|
|
(chars, readdata) = self.read_until(data, dataoffset, ';')
|
107 |
|
|
# +1 for end semicolon
|
108 |
|
|
dataoffset += chars + 1
|
109 |
|
|
|
110 |
|
|
# n => None
|
111 |
|
|
elif dtype == 'n':
|
112 |
|
|
readdata = None
|
113 |
|
|
|
114 |
|
|
# s => String
|
115 |
|
|
elif dtype == 's':
|
116 |
|
|
(chars, stringlength) = self.read_until(data, dataoffset, ':')
|
117 |
|
|
# +2 for colons around length field
|
118 |
|
|
dataoffset += chars + 2
|
119 |
|
|
|
120 |
|
|
# +1 for start quote
|
121 |
|
|
(chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength))
|
122 |
|
|
# +2 for endquote semicolon
|
123 |
|
|
dataoffset += chars + 2
|
124 |
|
|
|
125 |
|
|
if chars != int(stringlength) != int(readdata):
|
126 |
|
|
raise Exception("String length mismatch")
|
127 |
|
|
|
128 |
|
|
# array => Dict
|
129 |
|
|
# If you originally serialized a Tuple or List, it will
|
130 |
|
|
# be unserialized as a Dict. PHP doesn't have tuples or lists,
|
131 |
|
|
# only arrays - so everything has to get converted into an array
|
132 |
|
|
# when serializing and the original type of the array is lost
|
133 |
|
|
elif dtype == 'a':
|
134 |
|
|
readdata = {}
|
135 |
|
|
|
136 |
|
|
# How many keys does this list have?
|
137 |
|
|
(chars, keys) = self.read_until(data, dataoffset, ':')
|
138 |
|
|
# +2 for colons around length field
|
139 |
|
|
dataoffset += chars + 2
|
140 |
|
|
|
141 |
|
|
# Loop through and fetch this number of key/value pairs
|
142 |
|
|
for i in range(0, int(keys)):
|
143 |
|
|
# Read the key
|
144 |
|
|
(ktype, kchars, key) = self._unserialize(data, dataoffset)
|
145 |
|
|
dataoffset += kchars
|
146 |
|
|
#print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
|
147 |
|
|
|
148 |
|
|
# Read value of the key
|
149 |
|
|
(vtype, vchars, value) = self._unserialize(data, dataoffset)
|
150 |
|
|
dataoffset += vchars
|
151 |
|
|
#print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
|
152 |
|
|
|
153 |
|
|
# Set the list element
|
154 |
|
|
readdata[key] = value
|
155 |
|
|
|
156 |
|
|
# +1 for end semicolon
|
157 |
|
|
dataoffset += 1
|
158 |
|
|
#chars = int(dataoffset) - start
|
159 |
|
|
|
160 |
|
|
# I don't know how to unserialize this
|
161 |
|
|
|
162 |
|
|
|
163 |
|
|
else:
|
164 |
|
|
raise Exception("Unknown / Unhandled data type (%s)!" % dtype)
|
165 |
|
|
|
166 |
|
|
|
167 |
|
|
return (dtype, dataoffset-offset, typeconvert(readdata))
|
168 |
|
|
|
169 |
|
|
def read_until(self, data, offset, stopchar):
|
170 |
|
|
"""
|
171 |
|
|
Read from data[offset] until you encounter some char 'stopchar'.
|
172 |
|
|
"""
|
173 |
|
|
buf = []
|
174 |
|
|
char = data[offset:offset+1]
|
175 |
|
|
i = 2
|
176 |
|
|
while char != stopchar:
|
177 |
|
|
# Consumed all the characters and havent found ';'
|
178 |
|
|
if i+offset > len(data):
|
179 |
|
|
raise Exception("Invalid")
|
180 |
|
|
buf.append(char)
|
181 |
|
|
char = data[offset+(i-1):offset+i]
|
182 |
|
|
i += 1
|
183 |
|
|
|
184 |
|
|
# (chars_read, data)
|
185 |
|
|
return (len(buf), "".join(buf))
|
186 |
|
|
|
187 |
|
|
def read_chars(self, data, offset, length):
|
188 |
|
|
"""
|
189 |
|
|
Read 'length' number of chars from data[offset].
|
190 |
|
|
"""
|
191 |
|
|
buf = []
|
192 |
|
|
# Account for the starting quote char
|
193 |
|
|
#offset += 1
|
194 |
|
|
for i in range(0, length):
|
195 |
|
|
char = data[offset+(i-1):offset+i]
|
196 |
|
|
buf.append(char)
|
197 |
|
|
|
198 |
|
|
# (chars_read, data)
|
199 |
|
|
return (len(buf), "".join(buf))
|
200 |
|
|
|
201 |
|
|
|