ViewVC Help
View File | Revision Log | Show Annotations | Root Listing
root/cvsroot/UserCode/MitCommon/OptIO/src/rle.c
Revision: 1.1
Committed: Tue Feb 24 11:56:44 2009 UTC (16 years, 2 months ago) by loizides
Content type: text/plain
Branch: MAIN
CVS Tags: Mit_032, Mit_031, Mit_025c_branch2, Mit_025c_branch1, Mit_030, Mit_029c, Mit_030_pre1, Mit_029a, Mit_029, Mit_029_pre1, Mit_028a, Mit_025c_branch0, Mit_028, Mit_027a, Mit_027, Mit_026, Mit_025e, Mit_025d, Mit_025c, Mit_025b, Mit_025a, Mit_025, Mit_025pre2, Mit_024b, Mit_025pre1, Mit_024a, Mit_024, Mit_023, Mit_022a, Mit_022, Mit_020d, TMit_020d, Mit_020c, Mit_021, Mit_021pre2, Mit_021pre1, Mit_020b, Mit_020a, Mit_020, Mit_020pre1, Mit_018, Mit_017, Mit_017pre3, Mit_017pre2, Mit_017pre1, V07-05-00, Mit_016, Mit_015b, Mit_015a, Mit_015, Mit_014e, Mit_014d, Mit_014c, Mit_014b, ConvRejection-10-06-09, Mit_014a, Mit_014, Mit_014pre3, Mit_014pre2, Mit_014pre1, Mit_013d, Mit_013c, Mit_013b, Mit_013a, Mit_013, Mit_013pre1, Mit_012i, Mit_012g, Mit_012f, Mit_012e, Mit_012d, Mit_012c, Mit_012b, Mit_012a, Mit_012, Mit_011a, Mit_011, Mit_010a, Mit_010, Mit_009c, Mit_009b, Mit_009a, Mit_009, Mit_008, Mit_008pre2, Mit_008pre1, HEAD
Branch point for: Mit_025c_branch
Log Message:
Preload lib for compression improvements.

File Contents

# User Rev Content
1 loizides 1.1 /*************************************************************************
2     * Name: rle.c
3     * Author: Marcus Geelnard
4     * Description: RLE coder/decoder implementation.
5     * Reentrant: Yes
6     *
7     * RLE (Run Length Encoding) is the simplest possible lossless compression
8     * method. Nevertheless it serves a purpose, even in state of the art
9     * compression (it is used in JPEG compression, for instance). The basic
10     * principle is to identify sequences of equal bytes, and replace them with
11     * the byte in question and a repetition count (coded in some clever
12     * fashion).
13     *
14     * There are several different ways to do RLE. The particular method
15     * implemented here is a very efficient one. Instead of coding runs for
16     * both repeating and non-repeating sections, a special marker byte is
17     * used to indicate the start of a repeating section. Non-repeating
18     * sections can thus have any length without being interrupted by control
19     * bytes, except for the rare case when the special marker byte appears in
20     * the non-repeating section (which is coded with at most two bytes). For
21     * optimal efficiency, the marker byte is chosen as the least frequent
22     * (perhaps even non-existent) symbol in the input stream.
23     *
24     * Repeating runs can be as long as 32768 bytes. Runs shorter than 129
25     * bytes require three bytes for coding (marker + count + symbol), whereas
26     * runs longer than 128 bytes require four bytes for coding (marker +
27     * counthi|0x80 + countlo + symbol). This is normally a win in compression,
28     * and it's very seldom a loss of compression ratio compared to using a
29     * fixed coding of three bytes (which allows coding a run of 256 bytes in
30     * just three bytes).
31     *
32     * With this scheme, the worst case compression result is
33     * (257/256)*insize + 1.
34     *
35     *-------------------------------------------------------------------------
36     * Note: This code is based on the code found in "codrle2.c" and
37     * "dcodrle2.c" by David Bourgin, as described in "Introduction to the
38     * losslessy compression schemes", 1994. The main differences from Davids
39     * implementation are the addition of long (15-bit) run counts, the removal
40     * of file I/O (this implementation works solely with preallocated memory
41     * buffers), and that the code is now 100% reentrant.
42     *-------------------------------------------------------------------------
43     * Copyright (c) 2003-2006 Marcus Geelnard
44     *
45     * This software is provided 'as-is', without any express or implied
46     * warranty. In no event will the authors be held liable for any damages
47     * arising from the use of this software.
48     *
49     * Permission is granted to anyone to use this software for any purpose,
50     * including commercial applications, and to alter it and redistribute it
51     * freely, subject to the following restrictions:
52     *
53     * 1. The origin of this software must not be misrepresented; you must not
54     * claim that you wrote the original software. If you use this software
55     * in a product, an acknowledgment in the product documentation would
56     * be appreciated but is not required.
57     *
58     * 2. Altered source versions must be plainly marked as such, and must not
59     * be misrepresented as being the original software.
60     *
61     * 3. This notice may not be removed or altered from any source
62     * distribution.
63     *
64     * Marcus Geelnard
65     * marcus.geelnard at home.se
66     *************************************************************************/
67    
68    
69    
70     /*************************************************************************
71     * INTERNAL FUNCTIONS *
72     *************************************************************************/
73    
74    
75     /*************************************************************************
76     * _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
77     * times.
78     *************************************************************************/
79    
80     static void _RLE_WriteRep( unsigned char *out, unsigned int *outpos,
81     unsigned char marker, unsigned char symbol, unsigned int count )
82     {
83     unsigned int i, idx;
84    
85     idx = *outpos;
86     if( count <= 3 )
87     {
88     if( symbol == marker )
89     {
90     out[ idx ++ ] = marker;
91     out[ idx ++ ] = count-1;
92     }
93     else
94     {
95     for( i = 0; i < count; ++ i )
96     {
97     out[ idx ++ ] = symbol;
98     }
99     }
100     }
101     else
102     {
103     out[ idx ++ ] = marker;
104     -- count;
105     if( count >= 128 )
106     {
107     out[ idx ++ ] = (count >> 8) | 0x80;
108     }
109     out[ idx ++ ] = count & 0xff;
110     out[ idx ++ ] = symbol;
111     }
112     *outpos = idx;
113     }
114    
115    
116     /*************************************************************************
117     * _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
118     * is the marker symbol, and special care has to be taken for the case
119     * when 'symbol' == 'marker'.
120     *************************************************************************/
121    
122     static void _RLE_WriteNonRep( unsigned char *out, unsigned int *outpos,
123     unsigned char marker, unsigned char symbol )
124     {
125     unsigned int idx;
126    
127     idx = *outpos;
128     if( symbol == marker )
129     {
130     out[ idx ++ ] = marker;
131     out[ idx ++ ] = 0;
132     }
133     else
134     {
135     out[ idx ++ ] = symbol;
136     }
137     *outpos = idx;
138     }
139    
140    
141    
142     /*************************************************************************
143     * PUBLIC FUNCTIONS *
144     *************************************************************************/
145    
146    
147     /*************************************************************************
148     * RLE_Compress() - Compress a block of data using an RLE coder.
149     * in - Input (uncompressed) buffer.
150     * out - Output (compressed) buffer. This buffer must be 0.4% larger
151     * than the input buffer, plus one byte.
152     * insize - Number of input bytes.
153     * The function returns the size of the compressed data.
154     *************************************************************************/
155    
156     int RLE_Compress( unsigned char *in, unsigned char *out,
157     unsigned int insize )
158     {
159     unsigned char byte1, byte2, marker;
160     unsigned int inpos, outpos, count, i, histogram[ 256 ];
161    
162     /* Do we have anything to compress? */
163     if( insize < 1 )
164     {
165     return 0;
166     }
167    
168     /* Create histogram */
169     for( i = 0; i < 256; ++ i )
170     {
171     histogram[ i ] = 0;
172     }
173     for( i = 0; i < insize; ++ i )
174     {
175     ++ histogram[ in[ i ] ];
176     }
177    
178     /* Find the least common byte, and use it as the repetition marker */
179     marker = 0;
180     for( i = 1; i < 256; ++ i )
181     {
182     if( histogram[ i ] < histogram[ marker ] )
183     {
184     marker = i;
185     }
186     }
187    
188     /* Remember the repetition marker for the decoder */
189     out[ 0 ] = marker;
190     outpos = 1;
191    
192     /* Start of compression */
193     byte1 = in[ 0 ];
194     inpos = 1;
195     count = 1;
196    
197     /* Are there at least two bytes? */
198     if( insize >= 2 )
199     {
200     byte2 = in[ inpos ++ ];
201     count = 2;
202    
203     /* Main compression loop */
204     do
205     {
206     if( byte1 == byte2 )
207     {
208     /* Do we meet only a sequence of identical bytes? */
209     while( (inpos < insize) && (byte1 == byte2) &&
210     (count < 32768) )
211     {
212     byte2 = in[ inpos ++ ];
213     ++ count;
214     }
215     if( byte1 == byte2 )
216     {
217     _RLE_WriteRep( out, &outpos, marker, byte1, count );
218     if( inpos < insize )
219     {
220     byte1 = in[ inpos ++ ];
221     count = 1;
222     }
223     else
224     {
225     count = 0;
226     }
227     }
228     else
229     {
230     _RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
231     byte1 = byte2;
232     count = 1;
233     }
234     }
235     else
236     {
237     /* No, then don't handle the last byte */
238     _RLE_WriteNonRep( out, &outpos, marker, byte1 );
239     byte1 = byte2;
240     count = 1;
241     }
242     if( inpos < insize )
243     {
244     byte2 = in[ inpos ++ ];
245     count = 2;
246     }
247     }
248     while( (inpos < insize) || (count >= 2) );
249     }
250    
251     /* One byte left? */
252     if( count == 1 )
253     {
254     _RLE_WriteNonRep( out, &outpos, marker, byte1 );
255     }
256    
257     return outpos;
258     }
259    
260    
261     /*************************************************************************
262     * RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
263     * in - Input (compressed) buffer.
264     * out - Output (uncompressed) buffer. This buffer must be large
265     * enough to hold the uncompressed data.
266     * insize - Number of input bytes.
267     *************************************************************************/
268    
269     void RLE_Uncompress( unsigned char *in, unsigned char *out,
270     unsigned int insize )
271     {
272     unsigned char marker, symbol;
273     unsigned int i, inpos, outpos, count;
274    
275     /* Do we have anything to uncompress? */
276     if( insize < 1 )
277     {
278     return;
279     }
280    
281     /* Get marker symbol from input stream */
282     inpos = 0;
283     marker = in[ inpos ++ ];
284    
285     /* Main decompression loop */
286     outpos = 0;
287     do
288     {
289     symbol = in[ inpos ++ ];
290     if( symbol == marker )
291     {
292     /* We had a marker byte */
293     count = in[ inpos ++ ];
294     if( count <= 2 )
295     {
296     /* Counts 0, 1 and 2 are used for marker byte repetition
297     only */
298     for( i = 0; i <= count; ++ i )
299     {
300     out[ outpos ++ ] = marker;
301     }
302     }
303     else
304     {
305     if( count & 0x80 )
306     {
307     count = ((count & 0x7f) << 8) + in[ inpos ++ ];
308     }
309     symbol = in[ inpos ++ ];
310     for( i = 0; i <= count; ++ i )
311     {
312     out[ outpos ++ ] = symbol;
313     }
314     }
315     }
316     else
317     {
318     /* No marker, plain copy */
319     out[ outpos ++ ] = symbol;
320     }
321     }
322     while( inpos < insize );
323     }