OptIO/src/rle.c

/*************************************************************************
* Name:        rle.c
* Author:      Marcus Geelnard
* Description: RLE coder/decoder implementation.
* Reentrant:   Yes
*
* RLE (Run Length Encoding) is the simplest possible lossless compression
* method. Nevertheless it serves a purpose, even in state of the art
* compression (it is used in JPEG compression, for instance). The basic
* principle is to identify sequences of equal bytes, and replace them with
* the byte in question and a repetition count (coded in some clever
* fashion).
*
* There are several different ways to do RLE. The particular method
* implemented here is a very efficient one. Instead of coding runs for
* both repeating and non-repeating sections, a special marker byte is
* used to indicate the start of a repeating section. Non-repeating
* sections can thus have any length without being interrupted by control
* bytes, except for the rare case when the special marker byte appears in
* the non-repeating section (which is coded with at most two bytes). For
* optimal efficiency, the marker byte is chosen as the least frequent
* (perhaps even non-existent) symbol in the input stream.
*
* Repeating runs can be as long as 32768 bytes. Runs shorter than 129
* bytes require three bytes for coding (marker + count + symbol), whereas
* runs longer than 128 bytes require four bytes for coding (marker +
* counthi|0x80 + countlo + symbol). This is normally a win in compression,
* and it's very seldom a loss of compression ratio compared to using a
* fixed coding of three bytes (which allows coding a run of 256 bytes in
* just three bytes).
*
* With this scheme, the worst case compression result is
* (257/256)*insize + 1.
*
*-------------------------------------------------------------------------
* Note: This code is based on the code found in "codrle2.c" and
* "dcodrle2.c" by David Bourgin, as described in "Introduction to the
* losslessy compression schemes", 1994. The main differences from Davids
* implementation are the addition of long (15-bit) run counts, the removal
* of file I/O (this implementation works solely with preallocated memory
* buffers), and that the code is now 100% reentrant.
*-------------------------------------------------------------------------
* Copyright (c) 2003-2006 Marcus Geelnard
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
*    claim that you wrote the original software. If you use this software
*    in a product, an acknowledgment in the product documentation would
*    be appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not
*    be misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source
*    distribution.
*
* Marcus Geelnard
* marcus.geelnard at home.se
*************************************************************************/


/*************************************************************************
*                           INTERNAL FUNCTIONS                           *
*************************************************************************/


/*************************************************************************
* _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
* times.
*************************************************************************/

static void _RLE_WriteRep( unsigned char *out, unsigned int *outpos,
    unsigned char marker, unsigned char symbol, unsigned int count )
{
    unsigned int i, idx;

    idx = *outpos;
    if( count <= 3 )
    {
        if( symbol == marker )
        {
            out[ idx ++ ] = marker;
            out[ idx ++ ] = count-1;
        }
        else
        {
            for( i = 0; i < count; ++ i )
            {
                out[ idx ++ ] = symbol;
            }
        }
    }
    else
    {
        out[ idx ++ ] = marker;
        -- count;
        if( count >= 128 )
        {
            out[ idx ++ ] = (count >> 8) | 0x80;
        }
        out[ idx ++ ] = count & 0xff;
        out[ idx ++ ] = symbol;
    }
    *outpos = idx;
}


/*************************************************************************
* _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
* is the marker symbol, and special care has to be taken for the case
* when 'symbol' == 'marker'.
*************************************************************************/

static void _RLE_WriteNonRep( unsigned char *out, unsigned int *outpos,
    unsigned char marker, unsigned char symbol )
{
    unsigned int idx;

    idx = *outpos;
    if( symbol == marker )
    {
        out[ idx ++ ] = marker;
        out[ idx ++ ] = 0;
    }
    else
    {
        out[ idx ++ ] = symbol;
    }
    *outpos = idx;
}


/*************************************************************************
*                            PUBLIC FUNCTIONS                            *
*************************************************************************/


/*************************************************************************
* RLE_Compress() - Compress a block of data using an RLE coder.
*  in     - Input (uncompressed) buffer.
*  out    - Output (compressed) buffer. This buffer must be 0.4% larger
*           than the input buffer, plus one byte.
*  insize - Number of input bytes.
* The function returns the size of the compressed data.
*************************************************************************/

int RLE_Compress( unsigned char *in, unsigned char *out,
    unsigned int insize )
{
    unsigned char byte1, byte2, marker;
    unsigned int  inpos, outpos, count, i, histogram[ 256 ];

    /* Do we have anything to compress? */
    if( insize < 1 )
    {
        return 0;
    }

    /* Create histogram */
    for( i = 0; i < 256; ++ i )
    {
        histogram[ i ] = 0;
    }
    for( i = 0; i < insize; ++ i )
    {
        ++ histogram[ in[ i ] ];
    }

    /* Find the least common byte, and use it as the repetition marker */
    marker = 0;
    for( i = 1; i < 256; ++ i )
    {
        if( histogram[ i ] < histogram[ marker ] )
        {
            marker = i;
        }
    }

    /* Remember the repetition marker for the decoder */
    out[ 0 ] = marker;
    outpos = 1;

    /* Start of compression */
    byte1 = in[ 0 ];
    inpos = 1;
    count = 1;

    /* Are there at least two bytes? */
    if( insize >= 2 )
    {
        byte2 = in[ inpos ++ ];
        count = 2;

        /* Main compression loop */
        do
        {
            if( byte1 == byte2 )
            {
                /* Do we meet only a sequence of identical bytes? */
                while( (inpos < insize) && (byte1 == byte2) &&
                       (count < 32768) )
                {
                    byte2 = in[ inpos ++ ];
                    ++ count;
                }
                if( byte1 == byte2 )
                {
                    _RLE_WriteRep( out, &outpos, marker, byte1, count );
                    if( inpos < insize )
                    {
                        byte1 = in[ inpos ++ ];
                        count = 1;
                    }
                    else
                    {
                        count = 0;
                    }
                }
                else
                {
                    _RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
                    byte1 = byte2;
                    count = 1;
                }
            }
            else
            {
                /* No, then don't handle the last byte */
                _RLE_WriteNonRep( out, &outpos, marker, byte1 );
                byte1 = byte2;
                count = 1;
            }
            if( inpos < insize )
            {
                byte2 = in[ inpos ++ ];
                count = 2;
            }
        }
        while( (inpos < insize) || (count >= 2) );
    }

    /* One byte left? */
    if( count == 1 )
    {
        _RLE_WriteNonRep( out, &outpos, marker, byte1 );
    }

    return outpos;
}


/*************************************************************************
* RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
*  in      - Input (compressed) buffer.
*  out     - Output (uncompressed) buffer. This buffer must be large
*            enough to hold the uncompressed data.
*  insize  - Number of input bytes.
*************************************************************************/

void RLE_Uncompress( unsigned char *in, unsigned char *out,
    unsigned int insize )
{
    unsigned char marker, symbol;
    unsigned int  i, inpos, outpos, count;

    /* Do we have anything to uncompress? */
    if( insize < 1 )
    {
        return;
    }

    /* Get marker symbol from input stream */
    inpos = 0;
    marker = in[ inpos ++ ];

    /* Main decompression loop */
    outpos = 0;
    do
    {
        symbol = in[ inpos ++ ];
        if( symbol == marker )
        {
            /* We had a marker byte */
            count = in[ inpos ++ ];
            if( count <= 2 )
            {
                /* Counts 0, 1 and 2 are used for marker byte repetition
                   only */
                for( i = 0; i <= count; ++ i )
                {
                    out[ outpos ++ ] = marker;
                }
            }
            else
            {
                if( count & 0x80 )
                {
                    count = ((count & 0x7f) << 8) + in[ inpos ++ ];
                }
                symbol = in[ inpos ++ ];
                for( i = 0; i <= count; ++ i )
                {
                    out[ outpos ++ ] = symbol;
                }
            }
        }
        else
        {
            /* No marker, plain copy */
            out[ outpos ++ ] = symbol;
        }
    }
    while( inpos < insize );
}
Revision:	1.1
Committed:	Tue Feb 24 11:56:44 2009 UTC (16 years, 2 months ago) by loizides
Content type:	text/plain
Branch:	MAIN
CVS Tags:	Mit_032, Mit_031, Mit_025c_branch2, Mit_025c_branch1, Mit_030, Mit_029c, Mit_030_pre1, Mit_029a, Mit_029, Mit_029_pre1, Mit_028a, Mit_025c_branch0, Mit_028, Mit_027a, Mit_027, Mit_026, Mit_025e, Mit_025d, Mit_025c, Mit_025b, Mit_025a, Mit_025, Mit_025pre2, Mit_024b, Mit_025pre1, Mit_024a, Mit_024, Mit_023, Mit_022a, Mit_022, Mit_020d, TMit_020d, Mit_020c, Mit_021, Mit_021pre2, Mit_021pre1, Mit_020b, Mit_020a, Mit_020, Mit_020pre1, Mit_018, Mit_017, Mit_017pre3, Mit_017pre2, Mit_017pre1, V07-05-00, Mit_016, Mit_015b, Mit_015a, Mit_015, Mit_014e, Mit_014d, Mit_014c, Mit_014b, ConvRejection-10-06-09, Mit_014a, Mit_014, Mit_014pre3, Mit_014pre2, Mit_014pre1, Mit_013d, Mit_013c, Mit_013b, Mit_013a, Mit_013, Mit_013pre1, Mit_012i, Mit_012g, Mit_012f, Mit_012e, Mit_012d, Mit_012c, Mit_012b, Mit_012a, Mit_012, Mit_011a, Mit_011, Mit_010a, Mit_010, Mit_009c, Mit_009b, Mit_009a, Mit_009, Mit_008, Mit_008pre2, Mit_008pre1, HEAD
Branch point for:	Mit_025c_branch
Log Message:	Preload lib for compression improvements.
#	User	Rev	Content
1	loizides	1.1	/*************************************************************************
2			* Name: rle.c
3			* Author: Marcus Geelnard
4			* Description: RLE coder/decoder implementation.
5			* Reentrant: Yes
6			*
7			* RLE (Run Length Encoding) is the simplest possible lossless compression
8			* method. Nevertheless it serves a purpose, even in state of the art
9			* compression (it is used in JPEG compression, for instance). The basic
10			* principle is to identify sequences of equal bytes, and replace them with
11			* the byte in question and a repetition count (coded in some clever
12			* fashion).
13			*
14			* There are several different ways to do RLE. The particular method
15			* implemented here is a very efficient one. Instead of coding runs for
16			* both repeating and non-repeating sections, a special marker byte is
17			* used to indicate the start of a repeating section. Non-repeating
18			* sections can thus have any length without being interrupted by control
19			* bytes, except for the rare case when the special marker byte appears in
20			* the non-repeating section (which is coded with at most two bytes). For
21			* optimal efficiency, the marker byte is chosen as the least frequent
22			* (perhaps even non-existent) symbol in the input stream.
23			*
24			* Repeating runs can be as long as 32768 bytes. Runs shorter than 129
25			* bytes require three bytes for coding (marker + count + symbol), whereas
26			* runs longer than 128 bytes require four bytes for coding (marker +
27			* counthi\|0x80 + countlo + symbol). This is normally a win in compression,
28			* and it's very seldom a loss of compression ratio compared to using a
29			* fixed coding of three bytes (which allows coding a run of 256 bytes in
30			* just three bytes).
31			*
32			* With this scheme, the worst case compression result is
33			* (257/256)*insize + 1.
34			*
35			*-------------------------------------------------------------------------
36			* Note: This code is based on the code found in "codrle2.c" and
37			* "dcodrle2.c" by David Bourgin, as described in "Introduction to the
38			* losslessy compression schemes", 1994. The main differences from Davids
39			* implementation are the addition of long (15-bit) run counts, the removal
40			* of file I/O (this implementation works solely with preallocated memory
41			* buffers), and that the code is now 100% reentrant.
42			*-------------------------------------------------------------------------
43			* Copyright (c) 2003-2006 Marcus Geelnard
44			*
45			* This software is provided 'as-is', without any express or implied
46			* warranty. In no event will the authors be held liable for any damages
47			* arising from the use of this software.
48			*
49			* Permission is granted to anyone to use this software for any purpose,
50			* including commercial applications, and to alter it and redistribute it
51			* freely, subject to the following restrictions:
52			*
53			* 1. The origin of this software must not be misrepresented; you must not
54			* claim that you wrote the original software. If you use this software
55			* in a product, an acknowledgment in the product documentation would
56			* be appreciated but is not required.
57			*
58			* 2. Altered source versions must be plainly marked as such, and must not
59			* be misrepresented as being the original software.
60			*
61			* 3. This notice may not be removed or altered from any source
62			* distribution.
63			*
64			* Marcus Geelnard
65			* marcus.geelnard at home.se
66			*************************************************************************/
67
68
69
70			/*************************************************************************
71			* INTERNAL FUNCTIONS *
72			*************************************************************************/
73
74
75			/*************************************************************************
76			* _RLE_WriteRep() - Encode a repetition of 'symbol' repeated 'count'
77			* times.
78			*************************************************************************/
79
80			static void _RLE_WriteRep( unsigned char out, unsigned int outpos,
81			unsigned char marker, unsigned char symbol, unsigned int count )
82			{
83			unsigned int i, idx;
84
85			idx = *outpos;
86			if( count <= 3 )
87			{
88			if( symbol == marker )
89			{
90			out[ idx ++ ] = marker;
91			out[ idx ++ ] = count-1;
92			}
93			else
94			{
95			for( i = 0; i < count; ++ i )
96			{
97			out[ idx ++ ] = symbol;
98			}
99			}
100			}
101			else
102			{
103			out[ idx ++ ] = marker;
104			-- count;
105			if( count >= 128 )
106			{
107			out[ idx ++ ] = (count >> 8) \| 0x80;
108			}
109			out[ idx ++ ] = count & 0xff;
110			out[ idx ++ ] = symbol;
111			}
112			*outpos = idx;
113			}
114
115
116			/*************************************************************************
117			* _RLE_WriteNonRep() - Encode a non-repeating symbol, 'symbol'. 'marker'
118			* is the marker symbol, and special care has to be taken for the case
119			* when 'symbol' == 'marker'.
120			*************************************************************************/
121
122			static void _RLE_WriteNonRep( unsigned char out, unsigned int outpos,
123			unsigned char marker, unsigned char symbol )
124			{
125			unsigned int idx;
126
127			idx = *outpos;
128			if( symbol == marker )
129			{
130			out[ idx ++ ] = marker;
131			out[ idx ++ ] = 0;
132			}
133			else
134			{
135			out[ idx ++ ] = symbol;
136			}
137			*outpos = idx;
138			}
139
140
141
142			/*************************************************************************
143			* PUBLIC FUNCTIONS *
144			*************************************************************************/
145
146
147			/*************************************************************************
148			* RLE_Compress() - Compress a block of data using an RLE coder.
149			* in - Input (uncompressed) buffer.
150			* out - Output (compressed) buffer. This buffer must be 0.4% larger
151			* than the input buffer, plus one byte.
152			* insize - Number of input bytes.
153			* The function returns the size of the compressed data.
154			*************************************************************************/
155
156			int RLE_Compress( unsigned char in, unsigned char out,
157			unsigned int insize )
158			{
159			unsigned char byte1, byte2, marker;
160			unsigned int inpos, outpos, count, i, histogram[ 256 ];
161
162			/* Do we have anything to compress? */
163			if( insize < 1 )
164			{
165			return 0;
166			}
167
168			/* Create histogram */
169			for( i = 0; i < 256; ++ i )
170			{
171			histogram[ i ] = 0;
172			}
173			for( i = 0; i < insize; ++ i )
174			{
175			++ histogram[ in[ i ] ];
176			}
177
178			/* Find the least common byte, and use it as the repetition marker */
179			marker = 0;
180			for( i = 1; i < 256; ++ i )
181			{
182			if( histogram[ i ] < histogram[ marker ] )
183			{
184			marker = i;
185			}
186			}
187
188			/* Remember the repetition marker for the decoder */
189			out[ 0 ] = marker;
190			outpos = 1;
191
192			/* Start of compression */
193			byte1 = in[ 0 ];
194			inpos = 1;
195			count = 1;
196
197			/* Are there at least two bytes? */
198			if( insize >= 2 )
199			{
200			byte2 = in[ inpos ++ ];
201			count = 2;
202
203			/* Main compression loop */
204			do
205			{
206			if( byte1 == byte2 )
207			{
208			/* Do we meet only a sequence of identical bytes? */
209			while( (inpos < insize) && (byte1 == byte2) &&
210			(count < 32768) )
211			{
212			byte2 = in[ inpos ++ ];
213			++ count;
214			}
215			if( byte1 == byte2 )
216			{
217			_RLE_WriteRep( out, &outpos, marker, byte1, count );
218			if( inpos < insize )
219			{
220			byte1 = in[ inpos ++ ];
221			count = 1;
222			}
223			else
224			{
225			count = 0;
226			}
227			}
228			else
229			{
230			_RLE_WriteRep( out, &outpos, marker, byte1, count-1 );
231			byte1 = byte2;
232			count = 1;
233			}
234			}
235			else
236			{
237			/* No, then don't handle the last byte */
238			_RLE_WriteNonRep( out, &outpos, marker, byte1 );
239			byte1 = byte2;
240			count = 1;
241			}
242			if( inpos < insize )
243			{
244			byte2 = in[ inpos ++ ];
245			count = 2;
246			}
247			}
248			while( (inpos < insize) \|\| (count >= 2) );
249			}
250
251			/* One byte left? */
252			if( count == 1 )
253			{
254			_RLE_WriteNonRep( out, &outpos, marker, byte1 );
255			}
256
257			return outpos;
258			}
259
260
261			/*************************************************************************
262			* RLE_Uncompress() - Uncompress a block of data using an RLE decoder.
263			* in - Input (compressed) buffer.
264			* out - Output (uncompressed) buffer. This buffer must be large
265			* enough to hold the uncompressed data.
266			* insize - Number of input bytes.
267			*************************************************************************/
268
269			void RLE_Uncompress( unsigned char in, unsigned char out,
270			unsigned int insize )
271			{
272			unsigned char marker, symbol;
273			unsigned int i, inpos, outpos, count;
274
275			/* Do we have anything to uncompress? */
276			if( insize < 1 )
277			{
278			return;
279			}
280
281			/* Get marker symbol from input stream */
282			inpos = 0;
283			marker = in[ inpos ++ ];
284
285			/* Main decompression loop */
286			outpos = 0;
287			do
288			{
289			symbol = in[ inpos ++ ];
290			if( symbol == marker )
291			{
292			/* We had a marker byte */
293			count = in[ inpos ++ ];
294			if( count <= 2 )
295			{
296			/* Counts 0, 1 and 2 are used for marker byte repetition
297			only */
298			for( i = 0; i <= count; ++ i )
299			{
300			out[ outpos ++ ] = marker;
301			}
302			}
303			else
304			{
305			if( count & 0x80 )
306			{
307			count = ((count & 0x7f) << 8) + in[ inpos ++ ];
308			}
309			symbol = in[ inpos ++ ];
310			for( i = 0; i <= count; ++ i )
311			{
312			out[ outpos ++ ] = symbol;
313			}
314			}
315			}
316			else
317			{
318			/* No marker, plain copy */
319			out[ outpos ++ ] = symbol;
320			}
321			}
322			while( inpos < insize );
323			}