ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitCommon/OptIO/src/rle.c
Revision: 1.1
Committed: Tue Feb 24 11:56:44 2009 UTC (16 years, 2 months ago) by loizides
Content type: text/plain
Branch: MAIN
CVS Tags: Mit_032, Mit_031, Mit_025c_branch2, Mit_025c_branch1, Mit_030, Mit_029c, Mit_030_pre1, Mit_029a, Mit_029, Mit_029_pre1, Mit_028a, Mit_025c_branch0, Mit_028, Mit_027a, Mit_027, Mit_026, Mit_025e, Mit_025d, Mit_025c, Mit_025b, Mit_025a, Mit_025, Mit_025pre2, Mit_024b, Mit_025pre1, Mit_024a, Mit_024, Mit_023, Mit_022a, Mit_022, Mit_020d, TMit_020d, Mit_020c, Mit_021, Mit_021pre2, Mit_021pre1, Mit_020b, Mit_020a, Mit_020, Mit_020pre1, Mit_018, Mit_017, Mit_017pre3, Mit_017pre2, Mit_017pre1, V07-05-00, Mit_016, Mit_015b, Mit_015a, Mit_015, Mit_014e, Mit_014d, Mit_014c, Mit_014b, ConvRejection-10-06-09, Mit_014a, Mit_014, Mit_014pre3, Mit_014pre2, Mit_014pre1, Mit_013d, Mit_013c, Mit_013b, Mit_013a, Mit_013, Mit_013pre1, Mit_012i, Mit_012g, Mit_012f, Mit_012e, Mit_012d, Mit_012c, Mit_012b, Mit_012a, Mit_012, Mit_011a, Mit_011, Mit_010a, Mit_010, Mit_009c, Mit_009b, Mit_009a, Mit_009, Mit_008, Mit_008pre2, Mit_008pre1, HEAD
Branch point for: Mit_025c_branch
Log Message:
Preload lib for compression improvements.

File Contents

# Content
1 /*************************************************************************
2 * Name: rle.c
3 * Author: Marcus Geelnard
4 * Description: RLE coder/decoder implementation.
5 * Reentrant: Yes
6 *
7 * RLE (Run Length Encoding) is the simplest possible lossless compression
8 * method. Nevertheless it serves a purpose, even in state of the art
9 * compression (it is used in JPEG compression, for instance). The basic
10 * principle is to identify sequences of equal bytes, and replace them with
11 * the byte in question and a repetition count (coded in some clever
12 * fashion).
13 *
14 * There are several different ways to do RLE. The particular method
15 * implemented here is a very efficient one. Instead of coding runs for
16 * both repeating and non-repeating sections, a special marker byte is
17 * used to indicate the start of a repeating section. Non-repeating
18 * sections can thus have any length without being interrupted by control
19 * bytes, except for the rare case when the special marker byte appears in
20 * the non-repeating section (which is coded with at most two bytes). For
21 * optimal efficiency, the marker byte is chosen as the least frequent
22 * (perhaps even non-existent) symbol in the input stream.
23 *
24 * Repeating runs can be as long as 32768 bytes. Runs shorter than 129
25 * bytes require three bytes for coding (marker + count + symbol), whereas
26 * runs longer than 128 bytes require four bytes for coding (marker +
27 * counthi|0x80 + countlo + symbol). This is normally a win in compression,
28 * and it's very seldom a loss of compression ratio compared to using a
29 * fixed coding of three bytes (which allows coding a run of 256 bytes in
30 * just three bytes).
31 *
32 * With this scheme, the worst case compression result is
33 * (257/256)*insize + 1.
34 *
35 *-------------------------------------------------------------------------
36 * Note: This code is based on the code found in "codrle2.c" and
37 * "dcodrle2.c" by David Bourgin, as described in "Introduction to the
38 * losslessy compression schemes", 1994. The main differences from Davids
39 * implementation are the addition of long (15-bit) run counts, the removal
40 * of file I/O (this implementation works solely with preallocated memory
41 * buffers), and that the code is now 100% reentrant.
42 *-------------------------------------------------------------------------
43 * Copyright (c) 2003-2006 Marcus Geelnard
44 *
45 * This software is provided 'as-is', without any express or implied
46 * warranty. In no event will the authors be held liable for any damages
47 * arising from the use of this software.
48 *
49 * Permission is granted to anyone to use this software for any purpose,
50 * including commercial applications, and to alter it and redistribute it
51 * freely, subject to the following restrictions:
52 *
53 * 1. The origin of this software must not be misrepresented; you must not
54 * claim that you wrote the original software. If you use this software
55 * in a product, an acknowledgment in the product documentation would
56 * be appreciated but is not required.
57 *
58 * 2. Altered source versions must be plainly marked as such, and must not
59 * be misrepresented as being the original software.
60 *
61 * 3. This notice may not be removed or altered from any source
62 * distribution.
63 *
64 * Marcus Geelnard
65 * marcus.geelnard at home.se
66 *************************************************************************/
67
68
69
70 /*************************************************************************
71 * INTERNAL FUNCTIONS *
72 *************************************************************************/
73
74
75 /*************************************************************************
76 * _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
77 * times.
78 *************************************************************************/
79
80 static void _RLE_WriteRep( unsigned char *out, unsigned int *outpos,
81 unsigned char marker, unsigned char symbol, unsigned int count )
82 {
83 unsigned int i, idx;
84
85 idx = *outpos;
86 if( count <= 3 )
87 {
88 if( symbol == marker )
89 {
90 out[ idx ++ ] = marker;
91 out[ idx ++ ] = count-1;
92 }
93 else
94 {
95 for( i = 0; i < count; ++ i )
96 {
97 out[ idx ++ ] = symbol;
98 }
99 }
100 }
101 else
102 {
103 out[ idx ++ ] = marker;
104 -- count;
105 if( count >= 128 )
106 {
107 out[ idx ++ ] = (count >> 8) | 0x80;
108 }
109 out[ idx ++ ] = count & 0xff;
110 out[ idx ++ ] = symbol;
111 }
112 *outpos = idx;
113 }
114
115
116 /*************************************************************************
117 * _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
118 * is the marker symbol, and special care has to be taken for the case
119 * when 'symbol' == 'marker'.
120 *************************************************************************/
121
122 static void _RLE_WriteNonRep( unsigned char *out, unsigned int *outpos,
123 unsigned char marker, unsigned char symbol )
124 {
125 unsigned int idx;
126
127 idx = *outpos;
128 if( symbol == marker )
129 {
130 out[ idx ++ ] = marker;
131 out[ idx ++ ] = 0;
132 }
133 else
134 {
135 out[ idx ++ ] = symbol;
136 }
137 *outpos = idx;
138 }
139
140
141
142 /*************************************************************************
143 * PUBLIC FUNCTIONS *
144 *************************************************************************/
145
146
147 /*************************************************************************
148 * RLE_Compress() - Compress a block of data using an RLE coder.
149 * in - Input (uncompressed) buffer.
150 * out - Output (compressed) buffer. This buffer must be 0.4% larger
151 * than the input buffer, plus one byte.
152 * insize - Number of input bytes.
153 * The function returns the size of the compressed data.
154 *************************************************************************/
155
156 int RLE_Compress( unsigned char *in, unsigned char *out,
157 unsigned int insize )
158 {
159 unsigned char byte1, byte2, marker;
160 unsigned int inpos, outpos, count, i, histogram[ 256 ];
161
162 /* Do we have anything to compress? */
163 if( insize < 1 )
164 {
165 return 0;
166 }
167
168 /* Create histogram */
169 for( i = 0; i < 256; ++ i )
170 {
171 histogram[ i ] = 0;
172 }
173 for( i = 0; i < insize; ++ i )
174 {
175 ++ histogram[ in[ i ] ];
176 }
177
178 /* Find the least common byte, and use it as the repetition marker */
179 marker = 0;
180 for( i = 1; i < 256; ++ i )
181 {
182 if( histogram[ i ] < histogram[ marker ] )
183 {
184 marker = i;
185 }
186 }
187
188 /* Remember the repetition marker for the decoder */
189 out[ 0 ] = marker;
190 outpos = 1;
191
192 /* Start of compression */
193 byte1 = in[ 0 ];
194 inpos = 1;
195 count = 1;
196
197 /* Are there at least two bytes? */
198 if( insize >= 2 )
199 {
200 byte2 = in[ inpos ++ ];
201 count = 2;
202
203 /* Main compression loop */
204 do
205 {
206 if( byte1 == byte2 )
207 {
208 /* Do we meet only a sequence of identical bytes? */
209 while( (inpos < insize) && (byte1 == byte2) &&
210 (count < 32768) )
211 {
212 byte2 = in[ inpos ++ ];
213 ++ count;
214 }
215 if( byte1 == byte2 )
216 {
217 _RLE_WriteRep( out, &outpos, marker, byte1, count );
218 if( inpos < insize )
219 {
220 byte1 = in[ inpos ++ ];
221 count = 1;
222 }
223 else
224 {
225 count = 0;
226 }
227 }
228 else
229 {
230 _RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
231 byte1 = byte2;
232 count = 1;
233 }
234 }
235 else
236 {
237 /* No, then don't handle the last byte */
238 _RLE_WriteNonRep( out, &outpos, marker, byte1 );
239 byte1 = byte2;
240 count = 1;
241 }
242 if( inpos < insize )
243 {
244 byte2 = in[ inpos ++ ];
245 count = 2;
246 }
247 }
248 while( (inpos < insize) || (count >= 2) );
249 }
250
251 /* One byte left? */
252 if( count == 1 )
253 {
254 _RLE_WriteNonRep( out, &outpos, marker, byte1 );
255 }
256
257 return outpos;
258 }
259
260
261 /*************************************************************************
262 * RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
263 * in - Input (compressed) buffer.
264 * out - Output (uncompressed) buffer. This buffer must be large
265 * enough to hold the uncompressed data.
266 * insize - Number of input bytes.
267 *************************************************************************/
268
269 void RLE_Uncompress( unsigned char *in, unsigned char *out,
270 unsigned int insize )
271 {
272 unsigned char marker, symbol;
273 unsigned int i, inpos, outpos, count;
274
275 /* Do we have anything to uncompress? */
276 if( insize < 1 )
277 {
278 return;
279 }
280
281 /* Get marker symbol from input stream */
282 inpos = 0;
283 marker = in[ inpos ++ ];
284
285 /* Main decompression loop */
286 outpos = 0;
287 do
288 {
289 symbol = in[ inpos ++ ];
290 if( symbol == marker )
291 {
292 /* We had a marker byte */
293 count = in[ inpos ++ ];
294 if( count <= 2 )
295 {
296 /* Counts 0, 1 and 2 are used for marker byte repetition
297 only */
298 for( i = 0; i <= count; ++ i )
299 {
300 out[ outpos ++ ] = marker;
301 }
302 }
303 else
304 {
305 if( count & 0x80 )
306 {
307 count = ((count & 0x7f) << 8) + in[ inpos ++ ];
308 }
309 symbol = in[ inpos ++ ];
310 for( i = 0; i <= count; ++ i )
311 {
312 out[ outpos ++ ] = symbol;
313 }
314 }
315 }
316 else
317 {
318 /* No marker, plain copy */
319 out[ outpos ++ ] = symbol;
320 }
321 }
322 while( inpos < insize );
323 }