1 |
/*************************************************************************
|
2 |
* Name: rle.c
|
3 |
* Author: Marcus Geelnard
|
4 |
* Description: RLE coder/decoder implementation.
|
5 |
* Reentrant: Yes
|
6 |
*
|
7 |
* RLE (Run Length Encoding) is the simplest possible lossless compression
|
8 |
* method. Nevertheless it serves a purpose, even in state of the art
|
9 |
* compression (it is used in JPEG compression, for instance). The basic
|
10 |
* principle is to identify sequences of equal bytes, and replace them with
|
11 |
* the byte in question and a repetition count (coded in some clever
|
12 |
* fashion).
|
13 |
*
|
14 |
* There are several different ways to do RLE. The particular method
|
15 |
* implemented here is a very efficient one. Instead of coding runs for
|
16 |
* both repeating and non-repeating sections, a special marker byte is
|
17 |
* used to indicate the start of a repeating section. Non-repeating
|
18 |
* sections can thus have any length without being interrupted by control
|
19 |
* bytes, except for the rare case when the special marker byte appears in
|
20 |
* the non-repeating section (which is coded with at most two bytes). For
|
21 |
* optimal efficiency, the marker byte is chosen as the least frequent
|
22 |
* (perhaps even non-existent) symbol in the input stream.
|
23 |
*
|
24 |
* Repeating runs can be as long as 32768 bytes. Runs shorter than 129
|
25 |
* bytes require three bytes for coding (marker + count + symbol), whereas
|
26 |
* runs longer than 128 bytes require four bytes for coding (marker +
|
27 |
* counthi|0x80 + countlo + symbol). This is normally a win in compression,
|
28 |
* and it's very seldom a loss of compression ratio compared to using a
|
29 |
* fixed coding of three bytes (which allows coding a run of 256 bytes in
|
30 |
* just three bytes).
|
31 |
*
|
32 |
* With this scheme, the worst case compression result is
|
33 |
* (257/256)*insize + 1.
|
34 |
*
|
35 |
*-------------------------------------------------------------------------
|
36 |
* Note: This code is based on the code found in "codrle2.c" and
|
37 |
* "dcodrle2.c" by David Bourgin, as described in "Introduction to the
|
38 |
* losslessy compression schemes", 1994. The main differences from Davids
|
39 |
* implementation are the addition of long (15-bit) run counts, the removal
|
40 |
* of file I/O (this implementation works solely with preallocated memory
|
41 |
* buffers), and that the code is now 100% reentrant.
|
42 |
*-------------------------------------------------------------------------
|
43 |
* Copyright (c) 2003-2006 Marcus Geelnard
|
44 |
*
|
45 |
* This software is provided 'as-is', without any express or implied
|
46 |
* warranty. In no event will the authors be held liable for any damages
|
47 |
* arising from the use of this software.
|
48 |
*
|
49 |
* Permission is granted to anyone to use this software for any purpose,
|
50 |
* including commercial applications, and to alter it and redistribute it
|
51 |
* freely, subject to the following restrictions:
|
52 |
*
|
53 |
* 1. The origin of this software must not be misrepresented; you must not
|
54 |
* claim that you wrote the original software. If you use this software
|
55 |
* in a product, an acknowledgment in the product documentation would
|
56 |
* be appreciated but is not required.
|
57 |
*
|
58 |
* 2. Altered source versions must be plainly marked as such, and must not
|
59 |
* be misrepresented as being the original software.
|
60 |
*
|
61 |
* 3. This notice may not be removed or altered from any source
|
62 |
* distribution.
|
63 |
*
|
64 |
* Marcus Geelnard
|
65 |
* marcus.geelnard at home.se
|
66 |
*************************************************************************/
|
67 |
|
68 |
|
69 |
|
70 |
/*************************************************************************
|
71 |
* INTERNAL FUNCTIONS *
|
72 |
*************************************************************************/
|
73 |
|
74 |
|
75 |
/*************************************************************************
|
76 |
* _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
|
77 |
* times.
|
78 |
*************************************************************************/
|
79 |
|
80 |
static void _RLE_WriteRep( unsigned char *out, unsigned int *outpos,
|
81 |
unsigned char marker, unsigned char symbol, unsigned int count )
|
82 |
{
|
83 |
unsigned int i, idx;
|
84 |
|
85 |
idx = *outpos;
|
86 |
if( count <= 3 )
|
87 |
{
|
88 |
if( symbol == marker )
|
89 |
{
|
90 |
out[ idx ++ ] = marker;
|
91 |
out[ idx ++ ] = count-1;
|
92 |
}
|
93 |
else
|
94 |
{
|
95 |
for( i = 0; i < count; ++ i )
|
96 |
{
|
97 |
out[ idx ++ ] = symbol;
|
98 |
}
|
99 |
}
|
100 |
}
|
101 |
else
|
102 |
{
|
103 |
out[ idx ++ ] = marker;
|
104 |
-- count;
|
105 |
if( count >= 128 )
|
106 |
{
|
107 |
out[ idx ++ ] = (count >> 8) | 0x80;
|
108 |
}
|
109 |
out[ idx ++ ] = count & 0xff;
|
110 |
out[ idx ++ ] = symbol;
|
111 |
}
|
112 |
*outpos = idx;
|
113 |
}
|
114 |
|
115 |
|
116 |
/*************************************************************************
|
117 |
* _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
|
118 |
* is the marker symbol, and special care has to be taken for the case
|
119 |
* when 'symbol' == 'marker'.
|
120 |
*************************************************************************/
|
121 |
|
122 |
static void _RLE_WriteNonRep( unsigned char *out, unsigned int *outpos,
|
123 |
unsigned char marker, unsigned char symbol )
|
124 |
{
|
125 |
unsigned int idx;
|
126 |
|
127 |
idx = *outpos;
|
128 |
if( symbol == marker )
|
129 |
{
|
130 |
out[ idx ++ ] = marker;
|
131 |
out[ idx ++ ] = 0;
|
132 |
}
|
133 |
else
|
134 |
{
|
135 |
out[ idx ++ ] = symbol;
|
136 |
}
|
137 |
*outpos = idx;
|
138 |
}
|
139 |
|
140 |
|
141 |
|
142 |
/*************************************************************************
|
143 |
* PUBLIC FUNCTIONS *
|
144 |
*************************************************************************/
|
145 |
|
146 |
|
147 |
/*************************************************************************
|
148 |
* RLE_Compress() - Compress a block of data using an RLE coder.
|
149 |
* in - Input (uncompressed) buffer.
|
150 |
* out - Output (compressed) buffer. This buffer must be 0.4% larger
|
151 |
* than the input buffer, plus one byte.
|
152 |
* insize - Number of input bytes.
|
153 |
* The function returns the size of the compressed data.
|
154 |
*************************************************************************/
|
155 |
|
156 |
int RLE_Compress( unsigned char *in, unsigned char *out,
|
157 |
unsigned int insize )
|
158 |
{
|
159 |
unsigned char byte1, byte2, marker;
|
160 |
unsigned int inpos, outpos, count, i, histogram[ 256 ];
|
161 |
|
162 |
/* Do we have anything to compress? */
|
163 |
if( insize < 1 )
|
164 |
{
|
165 |
return 0;
|
166 |
}
|
167 |
|
168 |
/* Create histogram */
|
169 |
for( i = 0; i < 256; ++ i )
|
170 |
{
|
171 |
histogram[ i ] = 0;
|
172 |
}
|
173 |
for( i = 0; i < insize; ++ i )
|
174 |
{
|
175 |
++ histogram[ in[ i ] ];
|
176 |
}
|
177 |
|
178 |
/* Find the least common byte, and use it as the repetition marker */
|
179 |
marker = 0;
|
180 |
for( i = 1; i < 256; ++ i )
|
181 |
{
|
182 |
if( histogram[ i ] < histogram[ marker ] )
|
183 |
{
|
184 |
marker = i;
|
185 |
}
|
186 |
}
|
187 |
|
188 |
/* Remember the repetition marker for the decoder */
|
189 |
out[ 0 ] = marker;
|
190 |
outpos = 1;
|
191 |
|
192 |
/* Start of compression */
|
193 |
byte1 = in[ 0 ];
|
194 |
inpos = 1;
|
195 |
count = 1;
|
196 |
|
197 |
/* Are there at least two bytes? */
|
198 |
if( insize >= 2 )
|
199 |
{
|
200 |
byte2 = in[ inpos ++ ];
|
201 |
count = 2;
|
202 |
|
203 |
/* Main compression loop */
|
204 |
do
|
205 |
{
|
206 |
if( byte1 == byte2 )
|
207 |
{
|
208 |
/* Do we meet only a sequence of identical bytes? */
|
209 |
while( (inpos < insize) && (byte1 == byte2) &&
|
210 |
(count < 32768) )
|
211 |
{
|
212 |
byte2 = in[ inpos ++ ];
|
213 |
++ count;
|
214 |
}
|
215 |
if( byte1 == byte2 )
|
216 |
{
|
217 |
_RLE_WriteRep( out, &outpos, marker, byte1, count );
|
218 |
if( inpos < insize )
|
219 |
{
|
220 |
byte1 = in[ inpos ++ ];
|
221 |
count = 1;
|
222 |
}
|
223 |
else
|
224 |
{
|
225 |
count = 0;
|
226 |
}
|
227 |
}
|
228 |
else
|
229 |
{
|
230 |
_RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
|
231 |
byte1 = byte2;
|
232 |
count = 1;
|
233 |
}
|
234 |
}
|
235 |
else
|
236 |
{
|
237 |
/* No, then don't handle the last byte */
|
238 |
_RLE_WriteNonRep( out, &outpos, marker, byte1 );
|
239 |
byte1 = byte2;
|
240 |
count = 1;
|
241 |
}
|
242 |
if( inpos < insize )
|
243 |
{
|
244 |
byte2 = in[ inpos ++ ];
|
245 |
count = 2;
|
246 |
}
|
247 |
}
|
248 |
while( (inpos < insize) || (count >= 2) );
|
249 |
}
|
250 |
|
251 |
/* One byte left? */
|
252 |
if( count == 1 )
|
253 |
{
|
254 |
_RLE_WriteNonRep( out, &outpos, marker, byte1 );
|
255 |
}
|
256 |
|
257 |
return outpos;
|
258 |
}
|
259 |
|
260 |
|
261 |
/*************************************************************************
|
262 |
* RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
|
263 |
* in - Input (compressed) buffer.
|
264 |
* out - Output (uncompressed) buffer. This buffer must be large
|
265 |
* enough to hold the uncompressed data.
|
266 |
* insize - Number of input bytes.
|
267 |
*************************************************************************/
|
268 |
|
269 |
void RLE_Uncompress( unsigned char *in, unsigned char *out,
|
270 |
unsigned int insize )
|
271 |
{
|
272 |
unsigned char marker, symbol;
|
273 |
unsigned int i, inpos, outpos, count;
|
274 |
|
275 |
/* Do we have anything to uncompress? */
|
276 |
if( insize < 1 )
|
277 |
{
|
278 |
return;
|
279 |
}
|
280 |
|
281 |
/* Get marker symbol from input stream */
|
282 |
inpos = 0;
|
283 |
marker = in[ inpos ++ ];
|
284 |
|
285 |
/* Main decompression loop */
|
286 |
outpos = 0;
|
287 |
do
|
288 |
{
|
289 |
symbol = in[ inpos ++ ];
|
290 |
if( symbol == marker )
|
291 |
{
|
292 |
/* We had a marker byte */
|
293 |
count = in[ inpos ++ ];
|
294 |
if( count <= 2 )
|
295 |
{
|
296 |
/* Counts 0, 1 and 2 are used for marker byte repetition
|
297 |
only */
|
298 |
for( i = 0; i <= count; ++ i )
|
299 |
{
|
300 |
out[ outpos ++ ] = marker;
|
301 |
}
|
302 |
}
|
303 |
else
|
304 |
{
|
305 |
if( count & 0x80 )
|
306 |
{
|
307 |
count = ((count & 0x7f) << 8) + in[ inpos ++ ];
|
308 |
}
|
309 |
symbol = in[ inpos ++ ];
|
310 |
for( i = 0; i <= count; ++ i )
|
311 |
{
|
312 |
out[ outpos ++ ] = symbol;
|
313 |
}
|
314 |
}
|
315 |
}
|
316 |
else
|
317 |
{
|
318 |
/* No marker, plain copy */
|
319 |
out[ outpos ++ ] = symbol;
|
320 |
}
|
321 |
}
|
322 |
while( inpos < insize );
|
323 |
}
|