CRAB/python/UnserializePHP.py

import types, string

"""
Unserialize class for the PHP RefDB serialization format.

@version v1.0
@@author Giovanni Ciraolo

@ From A Project of 
@Scott Hurring; scott at hurring dot com
@copyright Copyright (c) 2005 Scott Hurring
@license http://opensource.org/licenses/gpl-license.php GNU Public License
$Id: unserializePHP.py,v 1.1 2005/05/16 13:51:22 afanfani Exp $

Most recent version can be found at:
http://hurring.com/code/python/phpserialize/

Usage:
# Create an instance of the unserialize engine
u = PHPUnserialize()
# unserialize some string into python data
data = u.unserialize(serialized_string)

Please see README.txt for more information.
"""
class PHPUnserializeError:
 def __init__(self,line):
    print '\nERROR Unserializing: '+line+'\n'
    pass

class PHPUnserialize(object):
        """
        Class to unserialize something from the PHP Serialize format.

        Usage:
        u = PHPUnserialize()
        data = u.unserialize(serialized_string)
        """

        def __init__(self):
                pass

        def unserialize(self, data):
                return self._unserialize(data, 0)[2]

        def _unserialize(self, data, offset=0):
                """
                Find the next token and unserialize it.
                Recurse on array.

                offset = raw offset from start of data
                """
                
                buf = []
                dtype = string.lower(data[offset:offset+1])

                #print "# dtype =", dtype
                
                # 't:' = 2 chars
                dataoffset = offset + 2
                typeconvert = lambda x : x
                chars = datalength = 0

                # int => Integer
                if dtype == 'i':
                        typeconvert = lambda x : int(x)
                        (chars, readdata) = self.read_until(data, dataoffset, ';')
                        # +1 for end semicolon
                        dataoffset += chars + 1
                elif dtype == 'o':
                        (chars, keys) = self.read_until(data, dataoffset, ':')
                        dataoffset += chars + 1 
                        (chars, keys) = self.read_until(data, dataoffset, ':')
                        dataoffset += chars + 1 
                        (chars, keys) = self.read_until(data, dataoffset, ':')
                        #print chars,keys
                        dataoffset += chars + 2
                        readdata = {}
                        for i in range(0, int(keys)):
                                (ktype, kchars, key) = self._unserialize(data, dataoffset)
                                dataoffset += kchars
                        #       print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)

                                # Read value of the key
                                (vtype, vchars, value) = self._unserialize(data, dataoffset)
                                dataoffset += vchars
                        #       print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)

                                # Set the list element
                                readdata[key] = value
        
                                
                        # +1 for end semicolon
                        
                        dataoffset += chars - 1
                # bool => Boolean
                elif dtype == 'b':
                        typeconvert = lambda x : (int(x) == 1)
                        (chars, readdata) = self.read_until(data, dataoffset, ';')
                        # +1 for end semicolon
                        dataoffset += chars + 1

                # double => Floating Point
                elif dtype == 'd':
                        typeconvert = lambda x : float(x)
                        (chars, readdata) = self.read_until(data, dataoffset, ';')
                        # +1 for end semicolon
                        dataoffset += chars + 1

                # n => None
                elif dtype == 'n':
                        readdata = None

                # s => String
                elif dtype == 's':
                        (chars, stringlength) = self.read_until(data, dataoffset, ':')
                        # +2 for colons around length field
                        dataoffset += chars + 2

                        # +1 for start quote
                        (chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength))
                        # +2 for endquote semicolon
                        dataoffset += chars + 2

                        if chars != int(stringlength) != int(readdata):
                                raise Exception("String length mismatch")

                # array => Dict
                # If you originally serialized a Tuple or List, it will
                # be unserialized as a Dict.  PHP doesn't have tuples or lists,
                # only arrays - so everything has to get converted into an array
                # when serializing and the original type of the array is lost
                elif dtype == 'a':
                        readdata = {}

                        # How many keys does this list have?
                        (chars, keys) = self.read_until(data, dataoffset, ':')
                        # +2 for colons around length field
                        dataoffset += chars + 2

                        # Loop through and fetch this number of key/value pairs
                        for i in range(0, int(keys)):
                                # Read the key
                                (ktype, kchars, key) = self._unserialize(data, dataoffset)
                                dataoffset += kchars
                                #print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)

                                # Read value of the key
                                (vtype, vchars, value) = self._unserialize(data, dataoffset)
                                dataoffset += vchars
                                #print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)

                                # Set the list element
                                readdata[key] = value

                                # +1 for end semicolon
                        dataoffset += 1
                        #chars = int(dataoffset) - start

                # I don't know how to unserialize this


                else:
                        raise Exception("Unknown / Unhandled data type (%s)!" % dtype)


                return (dtype, dataoffset-offset, typeconvert(readdata))

        def read_until(self, data, offset, stopchar):
                """
                Read from data[offset] until you encounter some char 'stopchar'.
                """
                buf = []
                char = data[offset:offset+1]
                i = 2
                while char != stopchar:
                        # Consumed all the characters and havent found ';'
                        if i+offset > len(data):
                                raise Exception("Invalid")
                        buf.append(char)
                        char = data[offset+(i-1):offset+i]
                        i += 1

                # (chars_read, data)
                return (len(buf), "".join(buf))

        def read_chars(self, data, offset, length):
                """
                Read 'length' number of chars from data[offset].
                """
                buf = []
                # Account for the starting quote char
                #offset += 1
                for i in range(0, length):
                        char = data[offset+(i-1):offset+i]
                        buf.append(char)

                # (chars_read, data)
                return (len(buf), "".join(buf))


Revision:	1.1
Committed:	Mon Jul 25 14:31:24 2005 UTC (19 years, 9 months ago) by slacapra
Content type:	text/x-python
Branch:	MAIN
CVS Tags:	CRAB_1_5_0_pre3, CRAB_1_5_0_pre2, CRAB_1_4_2, CRAB_1_5_0_pre1, CRAB_1_4_1, CRAB_1_4_1_pre2, CRAB_1_4_1_pre1, CRAB_1_4_0, CRAB_1_4_0_pre4, CRAB_1_4_0_pre3, CRAB_1_4_0_pre2, CRAB_1_4_0_pre1, CRAB_1_3_0, CRAB_1_3_0_pre6, CRAB_1_3_0_pre5, CRAB_1_3_0_pre4, CRAB_1_3_0_pre3, HEAD_20092006, CRAB_1_2_1, CRAB_1_2_0, CRAB_1_2_0_pre9, CRAB_1_2_0_pre8, CRAB_1_2_0_pre7, post_cmssw_integration_20060527, pre_cmssw_integration_20060527, CRAB_1_1_0, CRAB_1_1_0_pre4, CRAB_1_1_0_pre3, CRAB_1_1_0_pre1, CRAB_1_0_7, CRAB_1_0_7_pre1, CRAB_1_0_6, CRAB_1_0_5, CRAB_1_0_4, CRAB_1_0_3, CRAB_1_0_2, CRAB_0_2_2, CRAB_1_0_1, CRAB_1_0_0_rc1, CRAB_1_0_0_beta4, CRAB_1_0_0_pre1_boss_2, CRAB_1_0_0_pre1_boss, CRAB_1_0_0_pre3, CRAB_1_0_0_pre2, CRAB_1_0_0_pre1
Branch point for:	branch_1_4_1, CRAB_BOSS4_v1, CRAB_BOSS4
Log Message:	too many changes to be listed...
#	User	Rev	Content
1	slacapra	1.1	import types, string
2
3			"""
4			Unserialize class for the PHP RefDB serialization format.
5
6			@version v1.0
7			@@author Giovanni Ciraolo
8
9			@ From A Project of
10			@Scott Hurring; scott at hurring dot com
11			@copyright Copyright (c) 2005 Scott Hurring
12			@license http://opensource.org/licenses/gpl-license.php GNU Public License
13			$Id: unserializePHP.py,v 1.1 2005/05/16 13:51:22 afanfani Exp $
14
15			Most recent version can be found at:
16			http://hurring.com/code/python/phpserialize/
17
18			Usage:
19			# Create an instance of the unserialize engine
20			u = PHPUnserialize()
21			# unserialize some string into python data
22			data = u.unserialize(serialized_string)
23
24			Please see README.txt for more information.
25			"""
26			class PHPUnserializeError:
27			def __init__(self,line):
28			print '\nERROR Unserializing: '+line+'\n'
29			pass
30
31			class PHPUnserialize(object):
32			"""
33			Class to unserialize something from the PHP Serialize format.
34
35			Usage:
36			u = PHPUnserialize()
37			data = u.unserialize(serialized_string)
38			"""
39
40			def __init__(self):
41			pass
42
43			def unserialize(self, data):
44			return self._unserialize(data, 0)[2]
45
46			def _unserialize(self, data, offset=0):
47			"""
48			Find the next token and unserialize it.
49			Recurse on array.
50
51			offset = raw offset from start of data
52			"""
53
54			buf = []
55			dtype = string.lower(data[offset:offset+1])
56
57			#print "# dtype =", dtype
58
59			# 't:' = 2 chars
60			dataoffset = offset + 2
61			typeconvert = lambda x : x
62			chars = datalength = 0
63
64			# int => Integer
65			if dtype == 'i':
66			typeconvert = lambda x : int(x)
67			(chars, readdata) = self.read_until(data, dataoffset, ';')
68			# +1 for end semicolon
69			dataoffset += chars + 1
70			elif dtype == 'o':
71			(chars, keys) = self.read_until(data, dataoffset, ':')
72			dataoffset += chars + 1
73			(chars, keys) = self.read_until(data, dataoffset, ':')
74			dataoffset += chars + 1
75			(chars, keys) = self.read_until(data, dataoffset, ':')
76			#print chars,keys
77			dataoffset += chars + 2
78			readdata = {}
79			for i in range(0, int(keys)):
80			(ktype, kchars, key) = self._unserialize(data, dataoffset)
81			dataoffset += kchars
82			# print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
83
84			# Read value of the key
85			(vtype, vchars, value) = self._unserialize(data, dataoffset)
86			dataoffset += vchars
87			# print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
88
89			# Set the list element
90			readdata[key] = value
91
92
93			# +1 for end semicolon
94
95			dataoffset += chars - 1
96			# bool => Boolean
97			elif dtype == 'b':
98			typeconvert = lambda x : (int(x) == 1)
99			(chars, readdata) = self.read_until(data, dataoffset, ';')
100			# +1 for end semicolon
101			dataoffset += chars + 1
102
103			# double => Floating Point
104			elif dtype == 'd':
105			typeconvert = lambda x : float(x)
106			(chars, readdata) = self.read_until(data, dataoffset, ';')
107			# +1 for end semicolon
108			dataoffset += chars + 1
109
110			# n => None
111			elif dtype == 'n':
112			readdata = None
113
114			# s => String
115			elif dtype == 's':
116			(chars, stringlength) = self.read_until(data, dataoffset, ':')
117			# +2 for colons around length field
118			dataoffset += chars + 2
119
120			# +1 for start quote
121			(chars, readdata) = self.read_chars(data, dataoffset+1, int(stringlength))
122			# +2 for endquote semicolon
123			dataoffset += chars + 2
124
125			if chars != int(stringlength) != int(readdata):
126			raise Exception("String length mismatch")
127
128			# array => Dict
129			# If you originally serialized a Tuple or List, it will
130			# be unserialized as a Dict. PHP doesn't have tuples or lists,
131			# only arrays - so everything has to get converted into an array
132			# when serializing and the original type of the array is lost
133			elif dtype == 'a':
134			readdata = {}
135
136			# How many keys does this list have?
137			(chars, keys) = self.read_until(data, dataoffset, ':')
138			# +2 for colons around length field
139			dataoffset += chars + 2
140
141			# Loop through and fetch this number of key/value pairs
142			for i in range(0, int(keys)):
143			# Read the key
144			(ktype, kchars, key) = self._unserialize(data, dataoffset)
145			dataoffset += kchars
146			#print "Key(%i) = (%s, %i, %s) %i" % (i, ktype, kchars, key, dataoffset)
147
148			# Read value of the key
149			(vtype, vchars, value) = self._unserialize(data, dataoffset)
150			dataoffset += vchars
151			#print "Value(%i) = (%s, %i, %s) %i" % (i, vtype, vchars, value, dataoffset)
152
153			# Set the list element
154			readdata[key] = value
155
156			# +1 for end semicolon
157			dataoffset += 1
158			#chars = int(dataoffset) - start
159
160			# I don't know how to unserialize this
161
162
163			else:
164			raise Exception("Unknown / Unhandled data type (%s)!" % dtype)
165
166
167			return (dtype, dataoffset-offset, typeconvert(readdata))
168
169			def read_until(self, data, offset, stopchar):
170			"""
171			Read from data[offset] until you encounter some char 'stopchar'.
172			"""
173			buf = []
174			char = data[offset:offset+1]
175			i = 2
176			while char != stopchar:
177			# Consumed all the characters and havent found ';'
178			if i+offset > len(data):
179			raise Exception("Invalid")
180			buf.append(char)
181			char = data[offset+(i-1):offset+i]
182			i += 1
183
184			# (chars_read, data)
185			return (len(buf), "".join(buf))
186
187			def read_chars(self, data, offset, length):
188			"""
189			Read 'length' number of chars from data[offset].
190			"""
191			buf = []
192			# Account for the starting quote char
193			#offset += 1
194			for i in range(0, length):
195			char = data[offset+(i-1):offset+i]
196			buf.append(char)
197
198			# (chars_read, data)
199			return (len(buf), "".join(buf))
200
201