OptIO/src/rle.c

/*************************************************************************
* Name:        rle.c
* Author:      Marcus Geelnard
* Description: RLE coder/decoder implementation.
* Reentrant:   Yes
*
* RLE (Run Length Encoding) is the simplest possible lossless compression
* method. Nevertheless it serves a purpose, even in state of the art
* compression (it is used in JPEG compression, for instance). The basic
* principle is to identify sequences of equal bytes, and replace them with
* the byte in question and a repetition count (coded in some clever
* fashion).
*
* There are several different ways to do RLE. The particular method
* implemented here is a very efficient one. Instead of coding runs for
* both repeating and non-repeating sections, a special marker byte is
* used to indicate the start of a repeating section. Non-repeating
* sections can thus have any length without being interrupted by control
* bytes, except for the rare case when the special marker byte appears in
* the non-repeating section (which is coded with at most two bytes). For
* optimal efficiency, the marker byte is chosen as the least frequent
* (perhaps even non-existent) symbol in the input stream.
*
* Repeating runs can be as long as 32768 bytes. Runs shorter than 129
* bytes require three bytes for coding (marker + count + symbol), whereas
* runs longer than 128 bytes require four bytes for coding (marker +
* counthi|0x80 + countlo + symbol). This is normally a win in compression,
* and it's very seldom a loss of compression ratio compared to using a
* fixed coding of three bytes (which allows coding a run of 256 bytes in
* just three bytes).
*
* With this scheme, the worst case compression result is
* (257/256)*insize + 1.
*
*-------------------------------------------------------------------------
* Note: This code is based on the code found in "codrle2.c" and
* "dcodrle2.c" by David Bourgin, as described in "Introduction to the
* losslessy compression schemes", 1994. The main differences from Davids
* implementation are the addition of long (15-bit) run counts, the removal
* of file I/O (this implementation works solely with preallocated memory
* buffers), and that the code is now 100% reentrant.
*-------------------------------------------------------------------------
* Copyright (c) 2003-2006 Marcus Geelnard
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
*    claim that you wrote the original software. If you use this software
*    in a product, an acknowledgment in the product documentation would
*    be appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not
*    be misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source
*    distribution.
*
* Marcus Geelnard
* marcus.geelnard at home.se
*************************************************************************/


/*************************************************************************
*                           INTERNAL FUNCTIONS                           *
*************************************************************************/


/*************************************************************************
* _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
* times.
*************************************************************************/

static void _RLE_WriteRep( unsigned char *out, unsigned int *outpos,
    unsigned char marker, unsigned char symbol, unsigned int count )
{
    unsigned int i, idx;

    idx = *outpos;
    if( count <= 3 )
    {
        if( symbol == marker )
        {
            out[ idx ++ ] = marker;
            out[ idx ++ ] = count-1;
        }
        else
        {
            for( i = 0; i < count; ++ i )
            {
                out[ idx ++ ] = symbol;
            }
        }
    }
    else
    {
        out[ idx ++ ] = marker;
        -- count;
        if( count >= 128 )
        {
            out[ idx ++ ] = (count >> 8) | 0x80;
        }
        out[ idx ++ ] = count & 0xff;
        out[ idx ++ ] = symbol;
    }
    *outpos = idx;
}


/*************************************************************************
* _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
* is the marker symbol, and special care has to be taken for the case
* when 'symbol' == 'marker'.
*************************************************************************/

static void _RLE_WriteNonRep( unsigned char *out, unsigned int *outpos,
    unsigned char marker, unsigned char symbol )
{
    unsigned int idx;

    idx = *outpos;
    if( symbol == marker )
    {
        out[ idx ++ ] = marker;
        out[ idx ++ ] = 0;
    }
    else
    {
        out[ idx ++ ] = symbol;
    }
    *outpos = idx;
}


/*************************************************************************
*                            PUBLIC FUNCTIONS                            *
*************************************************************************/


/*************************************************************************
* RLE_Compress() - Compress a block of data using an RLE coder.
*  in     - Input (uncompressed) buffer.
*  out    - Output (compressed) buffer. This buffer must be 0.4% larger
*           than the input buffer, plus one byte.
*  insize - Number of input bytes.
* The function returns the size of the compressed data.
*************************************************************************/

int RLE_Compress( unsigned char *in, unsigned char *out,
    unsigned int insize )
{
    unsigned char byte1, byte2, marker;
    unsigned int  inpos, outpos, count, i, histogram[ 256 ];

    /* Do we have anything to compress? */
    if( insize < 1 )
    {
        return 0;
    }

    /* Create histogram */
    for( i = 0; i < 256; ++ i )
    {
        histogram[ i ] = 0;
    }
    for( i = 0; i < insize; ++ i )
    {
        ++ histogram[ in[ i ] ];
    }

    /* Find the least common byte, and use it as the repetition marker */
    marker = 0;
    for( i = 1; i < 256; ++ i )
    {
        if( histogram[ i ] < histogram[ marker ] )
        {
            marker = i;
        }
    }

    /* Remember the repetition marker for the decoder */
    out[ 0 ] = marker;
    outpos = 1;

    /* Start of compression */
    byte1 = in[ 0 ];
    inpos = 1;
    count = 1;

    /* Are there at least two bytes? */
    if( insize >= 2 )
    {
        byte2 = in[ inpos ++ ];
        count = 2;

        /* Main compression loop */
        do
        {
            if( byte1 == byte2 )
            {
                /* Do we meet only a sequence of identical bytes? */
                while( (inpos < insize) && (byte1 == byte2) &&
                       (count < 32768) )
                {
                    byte2 = in[ inpos ++ ];
                    ++ count;
                }
                if( byte1 == byte2 )
                {
                    _RLE_WriteRep( out, &outpos, marker, byte1, count );
                    if( inpos < insize )
                    {
                        byte1 = in[ inpos ++ ];
                        count = 1;
                    }
                    else
                    {
                        count = 0;
                    }
                }
                else
                {
                    _RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
                    byte1 = byte2;
                    count = 1;
                }
            }
            else
            {
                /* No, then don't handle the last byte */
                _RLE_WriteNonRep( out, &outpos, marker, byte1 );
                byte1 = byte2;
                count = 1;
            }
            if( inpos < insize )
            {
                byte2 = in[ inpos ++ ];
                count = 2;
            }
        }
        while( (inpos < insize) || (count >= 2) );
    }

    /* One byte left? */
    if( count == 1 )
    {
        _RLE_WriteNonRep( out, &outpos, marker, byte1 );
    }

    return outpos;
}


/*************************************************************************
* RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
*  in      - Input (compressed) buffer.
*  out     - Output (uncompressed) buffer. This buffer must be large
*            enough to hold the uncompressed data.
*  insize  - Number of input bytes.
*************************************************************************/

void RLE_Uncompress( unsigned char *in, unsigned char *out,
    unsigned int insize )
{
    unsigned char marker, symbol;
    unsigned int  i, inpos, outpos, count;

    /* Do we have anything to uncompress? */
    if( insize < 1 )
    {
        return;
    }

    /* Get marker symbol from input stream */
    inpos = 0;
    marker = in[ inpos ++ ];

    /* Main decompression loop */
    outpos = 0;
    do
    {
        symbol = in[ inpos ++ ];
        if( symbol == marker )
        {
            /* We had a marker byte */
            count = in[ inpos ++ ];
            if( count <= 2 )
            {
                /* Counts 0, 1 and 2 are used for marker byte repetition
                   only */
                for( i = 0; i <= count; ++ i )
                {
                    out[ outpos ++ ] = marker;
                }
            }
            else
            {
                if( count & 0x80 )
                {
                    count = ((count & 0x7f) << 8) + in[ inpos ++ ];
                }
                symbol = in[ inpos ++ ];
                for( i = 0; i <= count; ++ i )
                {
                    out[ outpos ++ ] = symbol;
                }
            }
        }
        else
        {
            /* No marker, plain copy */
            out[ outpos ++ ] = symbol;
        }
    }
    while( inpos < insize );
}
Revision:	1.1
Committed:	Tue Feb 24 11:56:44 2009 UTC (16 years, 2 months ago) by loizides
Content type:	text/plain
Branch:	MAIN
CVS Tags:	Mit_032, Mit_031, Mit_025c_branch2, Mit_025c_branch1, Mit_030, Mit_029c, Mit_030_pre1, Mit_029a, Mit_029, Mit_029_pre1, Mit_028a, Mit_025c_branch0, Mit_028, Mit_027a, Mit_027, Mit_026, Mit_025e, Mit_025d, Mit_025c, Mit_025b, Mit_025a, Mit_025, Mit_025pre2, Mit_024b, Mit_025pre1, Mit_024a, Mit_024, Mit_023, Mit_022a, Mit_022, Mit_020d, TMit_020d, Mit_020c, Mit_021, Mit_021pre2, Mit_021pre1, Mit_020b, Mit_020a, Mit_020, Mit_020pre1, Mit_018, Mit_017, Mit_017pre3, Mit_017pre2, Mit_017pre1, V07-05-00, Mit_016, Mit_015b, Mit_015a, Mit_015, Mit_014e, Mit_014d, Mit_014c, Mit_014b, ConvRejection-10-06-09, Mit_014a, Mit_014, Mit_014pre3, Mit_014pre2, Mit_014pre1, Mit_013d, Mit_013c, Mit_013b, Mit_013a, Mit_013, Mit_013pre1, Mit_012i, Mit_012g, Mit_012f, Mit_012e, Mit_012d, Mit_012c, Mit_012b, Mit_012a, Mit_012, Mit_011a, Mit_011, Mit_010a, Mit_010, Mit_009c, Mit_009b, Mit_009a, Mit_009, Mit_008, Mit_008pre2, Mit_008pre1, HEAD
Branch point for:	Mit_025c_branch
Log Message:	Preload lib for compression improvements.
#	Content
1	/*************************************************************************
2	* Name: rle.c
3	* Author: Marcus Geelnard
4	* Description: RLE coder/decoder implementation.
5	* Reentrant: Yes
6	*
7	* RLE (Run Length Encoding) is the simplest possible lossless compression
8	* method. Nevertheless it serves a purpose, even in state of the art
9	* compression (it is used in JPEG compression, for instance). The basic
10	* principle is to identify sequences of equal bytes, and replace them with
11	* the byte in question and a repetition count (coded in some clever
12	* fashion).
13	*
14	* There are several different ways to do RLE. The particular method
15	* implemented here is a very efficient one. Instead of coding runs for
16	* both repeating and non-repeating sections, a special marker byte is
17	* used to indicate the start of a repeating section. Non-repeating
18	* sections can thus have any length without being interrupted by control
19	* bytes, except for the rare case when the special marker byte appears in
20	* the non-repeating section (which is coded with at most two bytes). For
21	* optimal efficiency, the marker byte is chosen as the least frequent
22	* (perhaps even non-existent) symbol in the input stream.
23	*
24	* Repeating runs can be as long as 32768 bytes. Runs shorter than 129
25	* bytes require three bytes for coding (marker + count + symbol), whereas
26	* runs longer than 128 bytes require four bytes for coding (marker +
27	* counthi\|0x80 + countlo + symbol). This is normally a win in compression,
28	* and it's very seldom a loss of compression ratio compared to using a
29	* fixed coding of three bytes (which allows coding a run of 256 bytes in
30	* just three bytes).
31	*
32	* With this scheme, the worst case compression result is
33	* (257/256)*insize + 1.
34	*
35	*-------------------------------------------------------------------------
36	* Note: This code is based on the code found in "codrle2.c" and
37	* "dcodrle2.c" by David Bourgin, as described in "Introduction to the
38	* losslessy compression schemes", 1994. The main differences from Davids
39	* implementation are the addition of long (15-bit) run counts, the removal
40	* of file I/O (this implementation works solely with preallocated memory
41	* buffers), and that the code is now 100% reentrant.
42	*-------------------------------------------------------------------------
43	* Copyright (c) 2003-2006 Marcus Geelnard
44	*
45	* This software is provided 'as-is', without any express or implied
46	* warranty. In no event will the authors be held liable for any damages
47	* arising from the use of this software.
48	*
49	* Permission is granted to anyone to use this software for any purpose,
50	* including commercial applications, and to alter it and redistribute it
51	* freely, subject to the following restrictions:
52	*
53	* 1. The origin of this software must not be misrepresented; you must not
54	* claim that you wrote the original software. If you use this software
55	* in a product, an acknowledgment in the product documentation would
56	* be appreciated but is not required.
57	*
58	* 2. Altered source versions must be plainly marked as such, and must not
59	* be misrepresented as being the original software.
60	*
61	* 3. This notice may not be removed or altered from any source
62	* distribution.
63	*
64	* Marcus Geelnard
65	* marcus.geelnard at home.se
66	*************************************************************************/
67
68
69
70	/*************************************************************************
71	* INTERNAL FUNCTIONS *
72	*************************************************************************/
73
74
75	/*************************************************************************
76	* _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
77	* times.
78	*************************************************************************/
79
80	static void _RLE_WriteRep( unsigned char out, unsigned int outpos,
81	unsigned char marker, unsigned char symbol, unsigned int count )
82	{
83	unsigned int i, idx;
84
85	idx = *outpos;
86	if( count <= 3 )
87	{
88	if( symbol == marker )
89	{
90	out[ idx ++ ] = marker;
91	out[ idx ++ ] = count-1;
92	}
93	else
94	{
95	for( i = 0; i < count; ++ i )
96	{
97	out[ idx ++ ] = symbol;
98	}
99	}
100	}
101	else
102	{
103	out[ idx ++ ] = marker;
104	-- count;
105	if( count >= 128 )
106	{
107	out[ idx ++ ] = (count >> 8) \| 0x80;
108	}
109	out[ idx ++ ] = count & 0xff;
110	out[ idx ++ ] = symbol;
111	}
112	*outpos = idx;
113	}
114
115
116	/*************************************************************************
117	* _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
118	* is the marker symbol, and special care has to be taken for the case
119	* when 'symbol' == 'marker'.
120	*************************************************************************/
121
122	static void _RLE_WriteNonRep( unsigned char out, unsigned int outpos,
123	unsigned char marker, unsigned char symbol )
124	{
125	unsigned int idx;
126
127	idx = *outpos;
128	if( symbol == marker )
129	{
130	out[ idx ++ ] = marker;
131	out[ idx ++ ] = 0;
132	}
133	else
134	{
135	out[ idx ++ ] = symbol;
136	}
137	*outpos = idx;
138	}
139
140
141
142	/*************************************************************************
143	* PUBLIC FUNCTIONS *
144	*************************************************************************/
145
146
147	/*************************************************************************
148	* RLE_Compress() - Compress a block of data using an RLE coder.
149	* in - Input (uncompressed) buffer.
150	* out - Output (compressed) buffer. This buffer must be 0.4% larger
151	* than the input buffer, plus one byte.
152	* insize - Number of input bytes.
153	* The function returns the size of the compressed data.
154	*************************************************************************/
155
156	int RLE_Compress( unsigned char in, unsigned char out,
157	unsigned int insize )
158	{
159	unsigned char byte1, byte2, marker;
160	unsigned int inpos, outpos, count, i, histogram[ 256 ];
161
162	/* Do we have anything to compress? */
163	if( insize < 1 )
164	{
165	return 0;
166	}
167
168	/* Create histogram */
169	for( i = 0; i < 256; ++ i )
170	{
171	histogram[ i ] = 0;
172	}
173	for( i = 0; i < insize; ++ i )
174	{
175	++ histogram[ in[ i ] ];
176	}
177
178	/* Find the least common byte, and use it as the repetition marker */
179	marker = 0;
180	for( i = 1; i < 256; ++ i )
181	{
182	if( histogram[ i ] < histogram[ marker ] )
183	{
184	marker = i;
185	}
186	}
187
188	/* Remember the repetition marker for the decoder */
189	out[ 0 ] = marker;
190	outpos = 1;
191
192	/* Start of compression */
193	byte1 = in[ 0 ];
194	inpos = 1;
195	count = 1;
196
197	/* Are there at least two bytes? */
198	if( insize >= 2 )
199	{
200	byte2 = in[ inpos ++ ];
201	count = 2;
202
203	/* Main compression loop */
204	do
205	{
206	if( byte1 == byte2 )
207	{
208	/* Do we meet only a sequence of identical bytes? */
209	while( (inpos < insize) && (byte1 == byte2) &&
210	(count < 32768) )
211	{
212	byte2 = in[ inpos ++ ];
213	++ count;
214	}
215	if( byte1 == byte2 )
216	{
217	_RLE_WriteRep( out, &outpos, marker, byte1, count );
218	if( inpos < insize )
219	{
220	byte1 = in[ inpos ++ ];
221	count = 1;
222	}
223	else
224	{
225	count = 0;
226	}
227	}
228	else
229	{
230	_RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
231	byte1 = byte2;
232	count = 1;
233	}
234	}
235	else
236	{
237	/* No, then don't handle the last byte */
238	_RLE_WriteNonRep( out, &outpos, marker, byte1 );
239	byte1 = byte2;
240	count = 1;
241	}
242	if( inpos < insize )
243	{
244	byte2 = in[ inpos ++ ];
245	count = 2;
246	}
247	}
248	while( (inpos < insize) \|\| (count >= 2) );
249	}
250
251	/* One byte left? */
252	if( count == 1 )
253	{
254	_RLE_WriteNonRep( out, &outpos, marker, byte1 );
255	}
256
257	return outpos;
258	}
259
260
261	/*************************************************************************
262	* RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
263	* in - Input (compressed) buffer.
264	* out - Output (uncompressed) buffer. This buffer must be large
265	* enough to hold the uncompressed data.
266	* insize - Number of input bytes.
267	*************************************************************************/
268
269	void RLE_Uncompress( unsigned char in, unsigned char out,
270	unsigned int insize )
271	{
272	unsigned char marker, symbol;
273	unsigned int i, inpos, outpos, count;
274
275	/* Do we have anything to uncompress? */
276	if( insize < 1 )
277	{
278	return;
279	}
280
281	/* Get marker symbol from input stream */
282	inpos = 0;
283	marker = in[ inpos ++ ];
284
285	/* Main decompression loop */
286	outpos = 0;
287	do
288	{
289	symbol = in[ inpos ++ ];
290	if( symbol == marker )
291	{
292	/* We had a marker byte */
293	count = in[ inpos ++ ];
294	if( count <= 2 )
295	{
296	/* Counts 0, 1 and 2 are used for marker byte repetition
297	only */
298	for( i = 0; i <= count; ++ i )
299	{
300	out[ outpos ++ ] = marker;
301	}
302	}
303	else
304	{
305	if( count & 0x80 )
306	{
307	count = ((count & 0x7f) << 8) + in[ inpos ++ ];
308	}
309	symbol = in[ inpos ++ ];
310	for( i = 0; i <= count; ++ i )
311	{
312	out[ outpos ++ ] = symbol;
313	}
314	}
315	}
316	else
317	{
318	/* No marker, plain copy */
319	out[ outpos ++ ] = symbol;
320	}
321	}
322	while( inpos < insize );
323	}