## Huffman Glitch

6 posts / 0 new
Author
Message

Guys, I have designed a compression routine that uses the Huffman tree.  It appears to work well enough but some characters such as "despite all the ‘predictions’ to the contrary" comes out "despite all the â€˜predictionsâ€™ to the contrary".

```#include "../../applications/compression/huffman.h"

#include <stdlib.h>
#include "stdint.h"
#include "string.h"
#include "stdio.h"

#include "../../arm_rtos/system_malloc.h"
#include "../../arm_rtos/serial_console/c_printf.h"

/** In computer science and information theory, a Huffman code is a particular
*  type of optimal prefix code that is commonly used for lossless data
*  compression. The process of finding or using such a code proceeds by means
*   of Huffman coding, an algorithm developed by David A. Huffman while he
*   was a Sc.D. student at MIT, and published in the 1952 paper "A Method for
*    the Construction of Minimum-Redundancy Codes".[1]
*/

int32_t SubTrees;

static struct Huffman_Node * retrieve_small_leaf1(struct Huffman_Node * node, int32_t OffSet)
{

for (int32_t i = OffSet; ((i != 0) && (node->value == 0)); i--, node--);
return node;
}

static struct Huffman_Node * retrieve_small_leaf2(struct Huffman_Node * node, int32_t OffSet)
{

for (int32_t i = OffSet; ((i != 0) && (node->value == 0)); i--, node--);
return node - 1;
}

struct Huffman_Node * build_huffman_graph(uint8_t * inRaw, uint32_t Data){

struct RawDat histogram[256] = {0};

// clear binary histogram
for(int32_t i = 0; i < 255; i++){

histogram[i].repitions = 0;
histogram[i].dat = 0;
}

// derive binary histogram from raw data
for(int32_t i = 0; i < Data; i++, inRaw++){

histogram[(uint32_t)* inRaw].repitions++;
histogram[(uint32_t)* inRaw].dat = * inRaw;
}

// find the number of active sub trees
for(int32_t g = 0; g != 255; g++){
if(histogram[g].repitions != 0){
SubTrees++;
}
}

struct RawDat swap;
int32_t c, d;

// bubble sort the histogram
for (c = 0 ; c < 255 - 1; c++){
for (d = 0 ; d < 255 - 1; d++){

if ((histogram[d].repitions < histogram[d + 1].repitions)){

memcpy(&swap, &histogram[d], sizeof(struct RawDat));
memcpy(&histogram[d], &histogram[d + 1], sizeof(struct RawDat));
memcpy(&histogram[d + 1], &swap, sizeof(struct RawDat));
}
}
}

// fill in the data for each leaf
struct Huffman_Node * node = rtos_malloc((sizeof(struct Huffman_Node) * SubTrees));

for (int32_t f = 0; f < SubTrees; f++){

node[f].type	= LEAF;
node[f].value   = histogram[f].repitions;
node[f].marker  = histogram[f].dat;
node[f].left    = 0;
node[f].right   = 0;
}

struct Huffman_Node * LeafOne = NULL;
struct Huffman_Node * LeafTwo = NULL;

int32_t OffSet = SubTrees;

while(SubTrees - 1){

// get the two lowest leafs/branchs
LeafOne = retrieve_small_leaf1(&node[OffSet - 1], OffSet);
LeafTwo = retrieve_small_leaf2(&node[OffSet - 1], OffSet);

struct Huffman_Node * leaf2 = rtos_malloc(sizeof(struct Huffman_Node));
memcpy(leaf2, LeafTwo, sizeof(struct Huffman_Node));

// create new branch, this branch is also in contention
LeafTwo->type   = BRANCH;
LeafTwo->value  = LeafOne->value + leaf2->value;
LeafTwo->left   = leaf2;
LeafTwo->right  = LeafOne;
LeafTwo->marker = '#';

// remove from contention
LeafOne->value = 0;

struct Huffman_Node Temp;

// quickly resort the remaining leafs and branches
for (c = 0 ; c < SubTrees - 1; c++){
for (d = 0 ; d < SubTrees - 1; d++){

if (node[d].value < node[d + 1].value){

memcpy(&Temp, &node[d], sizeof(struct Huffman_Node));
memcpy(&node[d], &node[d + 1], sizeof(struct Huffman_Node));
memcpy(&node[d + 1], &Temp, sizeof(struct Huffman_Node));
}
}
}
SubTrees--;
}

// tree is complete, return pointer to the tree
return LeafTwo;
}

/* builds the huffman table with the bits for each entry. 1 stands for binary 0 and 2 for binary 1*/
void build_huffman_code_table(uint32_t codeTable[], struct Huffman_Node * tree, int32_t Code){

if (tree->type == LEAF){

// assign code to leaf
codeTable[(int)tree->marker] = Code;
}
else{

// traverse the branches, 1 equals 0 and 2 equals 1
build_huffman_code_table(codeTable, tree->left, Code * 10 + 1);
build_huffman_code_table(codeTable, tree->right, Code * 10 + 2);
}
return;
}

for(int32_t i = 0; i < bits_remaining; i++){
}

}

static uint16_t codify(uint32_t code){

char data[34];

sprintf(data, "%33d", (int)code);

code = 0x00;

for(int32_t i = 32; i != 0; i--){

code = code >> 1;

if(data[i] == '2'){
code |= 0b10000000000000000000000000000000;
}
}

return code;
}

uint32_t encode(uint8_t * inRaw, uint32_t * outCoded, uint32_t * codeTable, uint32_t DataSize){

uint8_t bits_remaining = 32;
uint32_t compression_size = 0;

char buf[16];
int32_t nr_bits = 0;

while(DataSize--){

// get Huffman code
uint32_t code = codeTable[* inRaw++];

nr_bits = 0;

// calculate the size of the Huffman code
itoa(code, buf, 10); nr_bits = strlen(buf);

// create code byte, in binary
code = codify(code);

// if it fits in this byte then add it to the stream
if(nr_bits <= bits_remaining){

* outCoded = * outCoded << nr_bits;
* outCoded |= code;

bits_remaining -= nr_bits;

// new stream byte required
if(bits_remaining == 0){

bits_remaining = 32;

outCoded++;
compression_size++;
}
}

// does not fit in the stream byte
else{

// bytes remaining to add equals i
nr_bits = nr_bits - bits_remaining;

// add the top bits to the stream
* outCoded = * outCoded << bits_remaining;
* outCoded |= code >> nr_bits;

// remaining code to be added to the stream

outCoded++;
compression_size++;

bits_remaining = 32;

if(nr_bits <= bits_remaining){

// insert Code into stream
* outCoded = * outCoded << nr_bits;
* outCoded |= code;

// calculate bytes remaining
bits_remaining -= nr_bits;

// new stream byte required
if(bits_remaining == 0){

bits_remaining = 32;
outCoded++;
compression_size++;
}
}
}
}

* outCoded = * outCoded << bits_remaining;
return compression_size;
}

void decoder(uint32_t * inCoded, uint8_t * outRaw, struct Huffman_Node * Tree, uint32_t NrData){

uint32_t Bit;
struct Huffman_Node * node = Tree;

while(1){

for(int32_t i = 0; i < 32; i++){

// get test bit
Bit = * inCoded & 0b10000000000000000000000000000000;

// move to the next Bit
* inCoded = * inCoded << 1;

if(Bit)
node = node->right;
else
node = node->left;

// if end point32_t the output code, else keep traversing the tree
if(node->type == LEAF){

// get decoded char and reset the algorithm
* outRaw++ = node->marker; node = Tree;

// quit decoding if we have completed
if(NrData-- == 0)
return;
}
}
inCoded++;
}
}

void huffman_encoder(uint8_t * inRaw, uint32_t DataSize, char * outCoded){

struct Huffman_Node * tree;
uint32_t codeTable1[256] = {0};

outCoded = rtos_malloc(DataSize);

tree = build_huffman_graph(inRaw, DataSize);
build_huffman_code_table(codeTable1, tree, 0);

uint32_t test = encode(inRaw, (uint32_t *)outCoded, codeTable1, DataSize);
c_printf("\n\r[g]compressed size %i - %i\n\r", (int)DataSize, (int)test);

rtos_realloc(outCoded, test);

memset(inRaw, 0x00, DataSize);
decoder((uint32_t *)outCoded, inRaw, tree, DataSize);

for(int32_t i = 0; i < DataSize; i++, inRaw++)
c_printf("%c", * inRaw);
}
```

I hope someone can spot the error.

Thanks Freaks.

```compressed size 4984 - 702
Did it make a risky choice? Yes definitely. Is it better off? Definitely not, no
, and recent economic data show that pretty clearly. Comparing Q4 2019 (before t
he pandemic hit) to Q1 2022 (after it) only two regions are back above pre-pande
mic levels, London and Northern Ireland of Protocol fame. All other regions are
in the negative, some like the Western Midlands by some -10% of GDP. It is reall
y hard to call that â€œbetter offâ€ed. The numbers are really not there. Of cour
se, you may argue that economics is not everything and that it is a small price
to pay for a Global Britain, but first of all the price is not so small and seco
ndly the whole Global Britain thing is a bit of a rather bad joke. Certainly the
US, China, Japan even Russia are not terribly impressed, And then there is the
EU. Yes, it is still there, despite all the â€˜predictionsâ€™ to the contrary. I
n fact, it certainly does not look any less united than before. On the contrary.
It has countries like Croatia and Bulgaria preparing to join the eurozone and a
slew of candidates wanting, nay, demanding to join. A bit of a problem at times
, but rather a luxury problem if you consider the state of the Union of that kin
gdom of the isles that is now a third country. Did it makdddteotsdob rssdob rs d
efinitely. Is it better off? Definitely not, no, and recent economic data show t
hat pretty clearly. Comparing Q4 2019 (before the pandemic hit) to Q1 2022 (afte
r it) only two regions are back above pre-pandemic levels, London and Northern I
reland of Protocol fame. All other regions are in the negative, some like the We
stern Midlands by some -10% of GDP. It is really hard to call that â€œbetter off
â€ed. The numbers are really not there. Of course, you may argue that economics
is not everything and that it is a small price to pay for a Global Britain, but
first of all the price is not so small and secondly the whole Global Britain thi
ng is a bit of a rather bad joke. Certainly the US, China, Japan even Russia are
not terribly impressed, And then there is the EU. Yes, it is still there, despi
te all the â€˜predictionsâ€™ to the contrary. In fact, it certainly does not loo
k any less united than before. On the contrary. It has countries like Croatia an
d Bulgaria preparing to join the eurozone and a slew of candidates wanting, nay,
demanding to join. A bit of a problem at times, but rather a luxury problem if
you consider the state of the Union of that kingdom of the isles that is now a t
hird country. Did it make a risky choice? Yes definitely. Is it better off? Defi
nitely not, no, and recent economic data show that pretty clearly. Comparing Q4
2019 (before the pandemic hit) to Q1 2022 (after it) only two regions are back a
bove pre-pandemic levels, London and Northern Ireland of Protocol fame. All othe
r regions are in the negative, some like the Western Midlands by some -10% of GD
P. It is really hard to call that â€œbetter offâ€ed. The numbers are really not
there. Of course, you may argue that economics is not everything and that it is
a small price to pay for a Global Britain, but first of all the price is not so
small and secondly the whole Global Britain thing is a bit of a rather bad joke.
Certainly the US, China, Japan even Russia are not terribly impressed, And then
there is the EU. Yes, it is still there, despite all the â€˜predictionsâ€™ to t
he contrary. In fact, it certainly does not look any less united than before. On
the contrary. It has countries like Croatia and Bulgaria preparing to join the
eurozone and a slew of candidates wanting, nay, demanding to join. A bit of a pr
oblem at times, but rather a luxury problem if you consider the state of the Uni
on of that kingdom of the isles that is now a third country. Did it make a risky
choice? Yes definitely. Is it better off? Definitely not, no, and recent econom
ic data show that pretty clearly. Comparing Q4 2019 (before the pandemic hit) to
Q1 2022 (after it) only two regions are back above pre-pandemic levels, London
and Northern Ireland of Protocol fame. All other regions are in the negative, so
me like the Western Midlands by some -10% of GDP. It is really hard to call that
â€œbetter offâ€ed. The numbers are really not there. Of course, you may argue t
hat economics is not everything and that it is a small price to pay for a Global
Britain, but first of all the price is not so small and secondly the whole Glob
al Britain thing is a bit of a rather bad joke. Certainly the US, China, Japan e
ven Russia are not terribly impressed, And then there is the EU. Yes, it is stil
l there, despite all the â€˜predictionsâ€™ to the contrary. In fact, it certainl
y does not look any less united than before. On the contrary. It has countries l
ike Croatia and Bulgaria preparing to join the eurozone and a slew of candidates
wanting, nay, demanding to join. A bit of a problem at times, but rather a luxu
ry problem if you consider the state of the Union of that kingdom of the isles t
hat is now a third count```

This topic has a solution.
Last Edited: Thu. Jun 9, 2022 - 08:52 PM

As this Freak's website also has problems with character encoding it's not clear whether what you are describing is your actual problem or  a Freaks encoding thing so can you describe in words (not symbols) what you consider the problem to be?

Have a look at the screen shot where I have highlighted the text area.

Check the range on your for loops. There's a few were you only handle 255 of the 256 elements.

balisong42 wrote:

Check the range on your for loops. There's a few were you only handle 255 of the 256 elements.

Do that now.

This reply has been marked as the solution.