1 |
loizides |
1.1 |
/*************************************************************************
|
2 |
|
|
* Name: rle.c
|
3 |
|
|
* Author: Marcus Geelnard
|
4 |
|
|
* Description: RLE coder/decoder implementation.
|
5 |
|
|
* Reentrant: Yes
|
6 |
|
|
*
|
7 |
|
|
* RLE (Run Length Encoding) is the simplest possible lossless compression
|
8 |
|
|
* method. Nevertheless it serves a purpose, even in state of the art
|
9 |
|
|
* compression (it is used in JPEG compression, for instance). The basic
|
10 |
|
|
* principle is to identify sequences of equal bytes, and replace them with
|
11 |
|
|
* the byte in question and a repetition count (coded in some clever
|
12 |
|
|
* fashion).
|
13 |
|
|
*
|
14 |
|
|
* There are several different ways to do RLE. The particular method
|
15 |
|
|
* implemented here is a very efficient one. Instead of coding runs for
|
16 |
|
|
* both repeating and non-repeating sections, a special marker byte is
|
17 |
|
|
* used to indicate the start of a repeating section. Non-repeating
|
18 |
|
|
* sections can thus have any length without being interrupted by control
|
19 |
|
|
* bytes, except for the rare case when the special marker byte appears in
|
20 |
|
|
* the non-repeating section (which is coded with at most two bytes). For
|
21 |
|
|
* optimal efficiency, the marker byte is chosen as the least frequent
|
22 |
|
|
* (perhaps even non-existent) symbol in the input stream.
|
23 |
|
|
*
|
24 |
|
|
* Repeating runs can be as long as 32768 bytes. Runs shorter than 129
|
25 |
|
|
* bytes require three bytes for coding (marker + count + symbol), whereas
|
26 |
|
|
* runs longer than 128 bytes require four bytes for coding (marker +
|
27 |
|
|
* counthi|0x80 + countlo + symbol). This is normally a win in compression,
|
28 |
|
|
* and it's very seldom a loss of compression ratio compared to using a
|
29 |
|
|
* fixed coding of three bytes (which allows coding a run of 256 bytes in
|
30 |
|
|
* just three bytes).
|
31 |
|
|
*
|
32 |
|
|
* With this scheme, the worst case compression result is
|
33 |
|
|
* (257/256)*insize + 1.
|
34 |
|
|
*
|
35 |
|
|
*-------------------------------------------------------------------------
|
36 |
|
|
* Note: This code is based on the code found in "codrle2.c" and
|
37 |
|
|
* "dcodrle2.c" by David Bourgin, as described in "Introduction to the
|
38 |
|
|
* losslessy compression schemes", 1994. The main differences from Davids
|
39 |
|
|
* implementation are the addition of long (15-bit) run counts, the removal
|
40 |
|
|
* of file I/O (this implementation works solely with preallocated memory
|
41 |
|
|
* buffers), and that the code is now 100% reentrant.
|
42 |
|
|
*-------------------------------------------------------------------------
|
43 |
|
|
* Copyright (c) 2003-2006 Marcus Geelnard
|
44 |
|
|
*
|
45 |
|
|
* This software is provided 'as-is', without any express or implied
|
46 |
|
|
* warranty. In no event will the authors be held liable for any damages
|
47 |
|
|
* arising from the use of this software.
|
48 |
|
|
*
|
49 |
|
|
* Permission is granted to anyone to use this software for any purpose,
|
50 |
|
|
* including commercial applications, and to alter it and redistribute it
|
51 |
|
|
* freely, subject to the following restrictions:
|
52 |
|
|
*
|
53 |
|
|
* 1. The origin of this software must not be misrepresented; you must not
|
54 |
|
|
* claim that you wrote the original software. If you use this software
|
55 |
|
|
* in a product, an acknowledgment in the product documentation would
|
56 |
|
|
* be appreciated but is not required.
|
57 |
|
|
*
|
58 |
|
|
* 2. Altered source versions must be plainly marked as such, and must not
|
59 |
|
|
* be misrepresented as being the original software.
|
60 |
|
|
*
|
61 |
|
|
* 3. This notice may not be removed or altered from any source
|
62 |
|
|
* distribution.
|
63 |
|
|
*
|
64 |
|
|
* Marcus Geelnard
|
65 |
|
|
* marcus.geelnard at home.se
|
66 |
|
|
*************************************************************************/
|
67 |
|
|
|
68 |
|
|
|
69 |
|
|
|
70 |
|
|
/*************************************************************************
|
71 |
|
|
* INTERNAL FUNCTIONS *
|
72 |
|
|
*************************************************************************/
|
73 |
|
|
|
74 |
|
|
|
75 |
|
|
/*************************************************************************
|
76 |
|
|
* _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
|
77 |
|
|
* times.
|
78 |
|
|
*************************************************************************/
|
79 |
|
|
|
80 |
|
|
static void _RLE_WriteRep( unsigned char *out, unsigned int *outpos,
|
81 |
|
|
unsigned char marker, unsigned char symbol, unsigned int count )
|
82 |
|
|
{
|
83 |
|
|
unsigned int i, idx;
|
84 |
|
|
|
85 |
|
|
idx = *outpos;
|
86 |
|
|
if( count <= 3 )
|
87 |
|
|
{
|
88 |
|
|
if( symbol == marker )
|
89 |
|
|
{
|
90 |
|
|
out[ idx ++ ] = marker;
|
91 |
|
|
out[ idx ++ ] = count-1;
|
92 |
|
|
}
|
93 |
|
|
else
|
94 |
|
|
{
|
95 |
|
|
for( i = 0; i < count; ++ i )
|
96 |
|
|
{
|
97 |
|
|
out[ idx ++ ] = symbol;
|
98 |
|
|
}
|
99 |
|
|
}
|
100 |
|
|
}
|
101 |
|
|
else
|
102 |
|
|
{
|
103 |
|
|
out[ idx ++ ] = marker;
|
104 |
|
|
-- count;
|
105 |
|
|
if( count >= 128 )
|
106 |
|
|
{
|
107 |
|
|
out[ idx ++ ] = (count >> 8) | 0x80;
|
108 |
|
|
}
|
109 |
|
|
out[ idx ++ ] = count & 0xff;
|
110 |
|
|
out[ idx ++ ] = symbol;
|
111 |
|
|
}
|
112 |
|
|
*outpos = idx;
|
113 |
|
|
}
|
114 |
|
|
|
115 |
|
|
|
116 |
|
|
/*************************************************************************
|
117 |
|
|
* _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
|
118 |
|
|
* is the marker symbol, and special care has to be taken for the case
|
119 |
|
|
* when 'symbol' == 'marker'.
|
120 |
|
|
*************************************************************************/
|
121 |
|
|
|
122 |
|
|
static void _RLE_WriteNonRep( unsigned char *out, unsigned int *outpos,
|
123 |
|
|
unsigned char marker, unsigned char symbol )
|
124 |
|
|
{
|
125 |
|
|
unsigned int idx;
|
126 |
|
|
|
127 |
|
|
idx = *outpos;
|
128 |
|
|
if( symbol == marker )
|
129 |
|
|
{
|
130 |
|
|
out[ idx ++ ] = marker;
|
131 |
|
|
out[ idx ++ ] = 0;
|
132 |
|
|
}
|
133 |
|
|
else
|
134 |
|
|
{
|
135 |
|
|
out[ idx ++ ] = symbol;
|
136 |
|
|
}
|
137 |
|
|
*outpos = idx;
|
138 |
|
|
}
|
139 |
|
|
|
140 |
|
|
|
141 |
|
|
|
142 |
|
|
/*************************************************************************
|
143 |
|
|
* PUBLIC FUNCTIONS *
|
144 |
|
|
*************************************************************************/
|
145 |
|
|
|
146 |
|
|
|
147 |
|
|
/*************************************************************************
|
148 |
|
|
* RLE_Compress() - Compress a block of data using an RLE coder.
|
149 |
|
|
* in - Input (uncompressed) buffer.
|
150 |
|
|
* out - Output (compressed) buffer. This buffer must be 0.4% larger
|
151 |
|
|
* than the input buffer, plus one byte.
|
152 |
|
|
* insize - Number of input bytes.
|
153 |
|
|
* The function returns the size of the compressed data.
|
154 |
|
|
*************************************************************************/
|
155 |
|
|
|
156 |
|
|
int RLE_Compress( unsigned char *in, unsigned char *out,
|
157 |
|
|
unsigned int insize )
|
158 |
|
|
{
|
159 |
|
|
unsigned char byte1, byte2, marker;
|
160 |
|
|
unsigned int inpos, outpos, count, i, histogram[ 256 ];
|
161 |
|
|
|
162 |
|
|
/* Do we have anything to compress? */
|
163 |
|
|
if( insize < 1 )
|
164 |
|
|
{
|
165 |
|
|
return 0;
|
166 |
|
|
}
|
167 |
|
|
|
168 |
|
|
/* Create histogram */
|
169 |
|
|
for( i = 0; i < 256; ++ i )
|
170 |
|
|
{
|
171 |
|
|
histogram[ i ] = 0;
|
172 |
|
|
}
|
173 |
|
|
for( i = 0; i < insize; ++ i )
|
174 |
|
|
{
|
175 |
|
|
++ histogram[ in[ i ] ];
|
176 |
|
|
}
|
177 |
|
|
|
178 |
|
|
/* Find the least common byte, and use it as the repetition marker */
|
179 |
|
|
marker = 0;
|
180 |
|
|
for( i = 1; i < 256; ++ i )
|
181 |
|
|
{
|
182 |
|
|
if( histogram[ i ] < histogram[ marker ] )
|
183 |
|
|
{
|
184 |
|
|
marker = i;
|
185 |
|
|
}
|
186 |
|
|
}
|
187 |
|
|
|
188 |
|
|
/* Remember the repetition marker for the decoder */
|
189 |
|
|
out[ 0 ] = marker;
|
190 |
|
|
outpos = 1;
|
191 |
|
|
|
192 |
|
|
/* Start of compression */
|
193 |
|
|
byte1 = in[ 0 ];
|
194 |
|
|
inpos = 1;
|
195 |
|
|
count = 1;
|
196 |
|
|
|
197 |
|
|
/* Are there at least two bytes? */
|
198 |
|
|
if( insize >= 2 )
|
199 |
|
|
{
|
200 |
|
|
byte2 = in[ inpos ++ ];
|
201 |
|
|
count = 2;
|
202 |
|
|
|
203 |
|
|
/* Main compression loop */
|
204 |
|
|
do
|
205 |
|
|
{
|
206 |
|
|
if( byte1 == byte2 )
|
207 |
|
|
{
|
208 |
|
|
/* Do we meet only a sequence of identical bytes? */
|
209 |
|
|
while( (inpos < insize) && (byte1 == byte2) &&
|
210 |
|
|
(count < 32768) )
|
211 |
|
|
{
|
212 |
|
|
byte2 = in[ inpos ++ ];
|
213 |
|
|
++ count;
|
214 |
|
|
}
|
215 |
|
|
if( byte1 == byte2 )
|
216 |
|
|
{
|
217 |
|
|
_RLE_WriteRep( out, &outpos, marker, byte1, count );
|
218 |
|
|
if( inpos < insize )
|
219 |
|
|
{
|
220 |
|
|
byte1 = in[ inpos ++ ];
|
221 |
|
|
count = 1;
|
222 |
|
|
}
|
223 |
|
|
else
|
224 |
|
|
{
|
225 |
|
|
count = 0;
|
226 |
|
|
}
|
227 |
|
|
}
|
228 |
|
|
else
|
229 |
|
|
{
|
230 |
|
|
_RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
|
231 |
|
|
byte1 = byte2;
|
232 |
|
|
count = 1;
|
233 |
|
|
}
|
234 |
|
|
}
|
235 |
|
|
else
|
236 |
|
|
{
|
237 |
|
|
/* No, then don't handle the last byte */
|
238 |
|
|
_RLE_WriteNonRep( out, &outpos, marker, byte1 );
|
239 |
|
|
byte1 = byte2;
|
240 |
|
|
count = 1;
|
241 |
|
|
}
|
242 |
|
|
if( inpos < insize )
|
243 |
|
|
{
|
244 |
|
|
byte2 = in[ inpos ++ ];
|
245 |
|
|
count = 2;
|
246 |
|
|
}
|
247 |
|
|
}
|
248 |
|
|
while( (inpos < insize) || (count >= 2) );
|
249 |
|
|
}
|
250 |
|
|
|
251 |
|
|
/* One byte left? */
|
252 |
|
|
if( count == 1 )
|
253 |
|
|
{
|
254 |
|
|
_RLE_WriteNonRep( out, &outpos, marker, byte1 );
|
255 |
|
|
}
|
256 |
|
|
|
257 |
|
|
return outpos;
|
258 |
|
|
}
|
259 |
|
|
|
260 |
|
|
|
261 |
|
|
/*************************************************************************
|
262 |
|
|
* RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
|
263 |
|
|
* in - Input (compressed) buffer.
|
264 |
|
|
* out - Output (uncompressed) buffer. This buffer must be large
|
265 |
|
|
* enough to hold the uncompressed data.
|
266 |
|
|
* insize - Number of input bytes.
|
267 |
|
|
*************************************************************************/
|
268 |
|
|
|
269 |
|
|
void RLE_Uncompress( unsigned char *in, unsigned char *out,
|
270 |
|
|
unsigned int insize )
|
271 |
|
|
{
|
272 |
|
|
unsigned char marker, symbol;
|
273 |
|
|
unsigned int i, inpos, outpos, count;
|
274 |
|
|
|
275 |
|
|
/* Do we have anything to uncompress? */
|
276 |
|
|
if( insize < 1 )
|
277 |
|
|
{
|
278 |
|
|
return;
|
279 |
|
|
}
|
280 |
|
|
|
281 |
|
|
/* Get marker symbol from input stream */
|
282 |
|
|
inpos = 0;
|
283 |
|
|
marker = in[ inpos ++ ];
|
284 |
|
|
|
285 |
|
|
/* Main decompression loop */
|
286 |
|
|
outpos = 0;
|
287 |
|
|
do
|
288 |
|
|
{
|
289 |
|
|
symbol = in[ inpos ++ ];
|
290 |
|
|
if( symbol == marker )
|
291 |
|
|
{
|
292 |
|
|
/* We had a marker byte */
|
293 |
|
|
count = in[ inpos ++ ];
|
294 |
|
|
if( count <= 2 )
|
295 |
|
|
{
|
296 |
|
|
/* Counts 0, 1 and 2 are used for marker byte repetition
|
297 |
|
|
only */
|
298 |
|
|
for( i = 0; i <= count; ++ i )
|
299 |
|
|
{
|
300 |
|
|
out[ outpos ++ ] = marker;
|
301 |
|
|
}
|
302 |
|
|
}
|
303 |
|
|
else
|
304 |
|
|
{
|
305 |
|
|
if( count & 0x80 )
|
306 |
|
|
{
|
307 |
|
|
count = ((count & 0x7f) << 8) + in[ inpos ++ ];
|
308 |
|
|
}
|
309 |
|
|
symbol = in[ inpos ++ ];
|
310 |
|
|
for( i = 0; i <= count; ++ i )
|
311 |
|
|
{
|
312 |
|
|
out[ outpos ++ ] = symbol;
|
313 |
|
|
}
|
314 |
|
|
}
|
315 |
|
|
}
|
316 |
|
|
else
|
317 |
|
|
{
|
318 |
|
|
/* No marker, plain copy */
|
319 |
|
|
out[ outpos ++ ] = symbol;
|
320 |
|
|
}
|
321 |
|
|
}
|
322 |
|
|
while( inpos < insize );
|
323 |
|
|
}
|