Guys, I have designed a compression routine that uses the Huffman tree. It appears to work well enough but some characters such as "despite all the ‘predictions’ to the contrary" comes out "despite all the ‘predictions’ to the contrary".
#include "../../applications/compression/huffman.h" #include <stdlib.h> #include "stdint.h" #include "string.h" #include "stdio.h" #include "../../arm_rtos/system_malloc.h" #include "../../arm_rtos/serial_console/c_printf.h" /** In computer science and information theory, a Huffman code is a particular * type of optimal prefix code that is commonly used for lossless data * compression. The process of finding or using such a code proceeds by means * of Huffman coding, an algorithm developed by David A. Huffman while he * was a Sc.D. student at MIT, and published in the 1952 paper "A Method for * the Construction of Minimum-Redundancy Codes".[1] */ int32_t SubTrees; static struct Huffman_Node * retrieve_small_leaf1(struct Huffman_Node * node, int32_t OffSet) { for (int32_t i = OffSet; ((i != 0) && (node->value == 0)); i--, node--); return node; } static struct Huffman_Node * retrieve_small_leaf2(struct Huffman_Node * node, int32_t OffSet) { for (int32_t i = OffSet; ((i != 0) && (node->value == 0)); i--, node--); return node - 1; } struct Huffman_Node * build_huffman_graph(uint8_t * inRaw, uint32_t Data){ struct RawDat histogram[256] = {0}; // clear binary histogram for(int32_t i = 0; i < 255; i++){ histogram[i].repitions = 0; histogram[i].dat = 0; } // derive binary histogram from raw data for(int32_t i = 0; i < Data; i++, inRaw++){ histogram[(uint32_t)* inRaw].repitions++; histogram[(uint32_t)* inRaw].dat = * inRaw; } // find the number of active sub trees for(int32_t g = 0; g != 255; g++){ if(histogram[g].repitions != 0){ SubTrees++; } } struct RawDat swap; int32_t c, d; // bubble sort the histogram for (c = 0 ; c < 255 - 1; c++){ for (d = 0 ; d < 255 - 1; d++){ if ((histogram[d].repitions < histogram[d + 1].repitions)){ memcpy(&swap, &histogram[d], sizeof(struct RawDat)); memcpy(&histogram[d], &histogram[d + 1], sizeof(struct RawDat)); memcpy(&histogram[d + 1], &swap, sizeof(struct RawDat)); } } } // fill in the data for each leaf struct Huffman_Node * node = rtos_malloc((sizeof(struct Huffman_Node) * SubTrees)); for (int32_t f = 0; f < SubTrees; f++){ node[f].type = LEAF; node[f].value = histogram[f].repitions; node[f].marker = histogram[f].dat; node[f].left = 0; node[f].right = 0; } struct Huffman_Node * LeafOne = NULL; struct Huffman_Node * LeafTwo = NULL; int32_t OffSet = SubTrees; while(SubTrees - 1){ // get the two lowest leafs/branchs LeafOne = retrieve_small_leaf1(&node[OffSet - 1], OffSet); LeafTwo = retrieve_small_leaf2(&node[OffSet - 1], OffSet); struct Huffman_Node * leaf2 = rtos_malloc(sizeof(struct Huffman_Node)); memcpy(leaf2, LeafTwo, sizeof(struct Huffman_Node)); // create new branch, this branch is also in contention LeafTwo->type = BRANCH; LeafTwo->value = LeafOne->value + leaf2->value; LeafTwo->left = leaf2; LeafTwo->right = LeafOne; LeafTwo->marker = '#'; // remove from contention LeafOne->value = 0; struct Huffman_Node Temp; // quickly resort the remaining leafs and branches for (c = 0 ; c < SubTrees - 1; c++){ for (d = 0 ; d < SubTrees - 1; d++){ if (node[d].value < node[d + 1].value){ memcpy(&Temp, &node[d], sizeof(struct Huffman_Node)); memcpy(&node[d], &node[d + 1], sizeof(struct Huffman_Node)); memcpy(&node[d + 1], &Temp, sizeof(struct Huffman_Node)); } } } SubTrees--; } // tree is complete, return pointer to the tree return LeafTwo; } /* builds the huffman table with the bits for each entry. 1 stands for binary 0 and 2 for binary 1*/ void build_huffman_code_table(uint32_t codeTable[], struct Huffman_Node * tree, int32_t Code){ if (tree->type == LEAF){ // assign code to leaf codeTable[(int)tree->marker] = Code; } else{ // traverse the branches, 1 equals 0 and 2 equals 1 build_huffman_code_table(codeTable, tree->left, Code * 10 + 1); build_huffman_code_table(codeTable, tree->right, Code * 10 + 2); } return; } static uint32_t mask(uint32_t bits_remaining){ uint32_t mask = 0; for(int32_t i = 0; i < bits_remaining; i++){ mask = mask << 1; mask |= 0b00000001; } return mask; } static uint16_t codify(uint32_t code){ char data[34]; sprintf(data, "%33d", (int)code); code = 0x00; for(int32_t i = 32; i != 0; i--){ code = code >> 1; if(data[i] == '2'){ code |= 0b10000000000000000000000000000000; } } return code; } uint32_t encode(uint8_t * inRaw, uint32_t * outCoded, uint32_t * codeTable, uint32_t DataSize){ uint8_t bits_remaining = 32; uint32_t compression_size = 0; char buf[16]; int32_t nr_bits = 0; while(DataSize--){ // get Huffman code uint32_t code = codeTable[* inRaw++]; nr_bits = 0; // calculate the size of the Huffman code itoa(code, buf, 10); nr_bits = strlen(buf); // create code byte, in binary code = codify(code); // if it fits in this byte then add it to the stream if(nr_bits <= bits_remaining){ * outCoded = * outCoded << nr_bits; * outCoded |= code; bits_remaining -= nr_bits; // new stream byte required if(bits_remaining == 0){ bits_remaining = 32; outCoded++; compression_size++; } } // does not fit in the stream byte else{ // bytes remaining to add equals i nr_bits = nr_bits - bits_remaining; // add the top bits to the stream * outCoded = * outCoded << bits_remaining; * outCoded |= code >> nr_bits; // remaining code to be added to the stream code = code & mask(nr_bits); // next byte,load outCoded++; compression_size++; bits_remaining = 32; if(nr_bits <= bits_remaining){ // insert Code into stream * outCoded = * outCoded << nr_bits; * outCoded |= code; // calculate bytes remaining bits_remaining -= nr_bits; // new stream byte required if(bits_remaining == 0){ bits_remaining = 32; outCoded++; compression_size++; } } } } //pad remaining integer code * outCoded = * outCoded << bits_remaining; return compression_size; } void decoder(uint32_t * inCoded, uint8_t * outRaw, struct Huffman_Node * Tree, uint32_t NrData){ uint32_t Bit; struct Huffman_Node * node = Tree; while(1){ for(int32_t i = 0; i < 32; i++){ // get test bit Bit = * inCoded & 0b10000000000000000000000000000000; // move to the next Bit * inCoded = * inCoded << 1; if(Bit) node = node->right; else node = node->left; // if end point32_t the output code, else keep traversing the tree if(node->type == LEAF){ // get decoded char and reset the algorithm * outRaw++ = node->marker; node = Tree; // quit decoding if we have completed if(NrData-- == 0) return; } } inCoded++; } } void huffman_encoder(uint8_t * inRaw, uint32_t DataSize, char * outCoded){ struct Huffman_Node * tree; uint32_t codeTable1[256] = {0}; outCoded = rtos_malloc(DataSize); tree = build_huffman_graph(inRaw, DataSize); build_huffman_code_table(codeTable1, tree, 0); uint32_t test = encode(inRaw, (uint32_t *)outCoded, codeTable1, DataSize); c_printf("\n\r[g]compressed size %i - %i\n\r", (int)DataSize, (int)test); rtos_realloc(outCoded, test); memset(inRaw, 0x00, DataSize); decoder((uint32_t *)outCoded, inRaw, tree, DataSize); for(int32_t i = 0; i < DataSize; i++, inRaw++) c_printf("%c", * inRaw); }
I hope someone can spot the error.
Thanks Freaks.
compressed size 4984 - 702 Did it make a risky choice? Yes definitely. Is it better off? Definitely not, no , and recent economic data show that pretty clearly. Comparing Q4 2019 (before t he pandemic hit) to Q1 2022 (after it) only two regions are back above pre-pande mic levels, London and Northern Ireland of Protocol fame. All other regions are in the negative, some like the Western Midlands by some -10% of GDP. It is reall y hard to call that “better offâ€ed. The numbers are really not there. Of cour se, you may argue that economics is not everything and that it is a small price to pay for a Global Britain, but first of all the price is not so small and seco ndly the whole Global Britain thing is a bit of a rather bad joke. Certainly the US, China, Japan even Russia are not terribly impressed, And then there is the EU. Yes, it is still there, despite all the ‘predictions’ to the contrary. I n fact, it certainly does not look any less united than before. On the contrary. It has countries like Croatia and Bulgaria preparing to join the eurozone and a slew of candidates wanting, nay, demanding to join. A bit of a problem at times , but rather a luxury problem if you consider the state of the Union of that kin gdom of the isles that is now a third country. Did it makdddteotsdob rssdob rs d efinitely. Is it better off? Definitely not, no, and recent economic data show t hat pretty clearly. Comparing Q4 2019 (before the pandemic hit) to Q1 2022 (afte r it) only two regions are back above pre-pandemic levels, London and Northern I reland of Protocol fame. All other regions are in the negative, some like the We stern Midlands by some -10% of GDP. It is really hard to call that “better off â€ed. The numbers are really not there. Of course, you may argue that economics is not everything and that it is a small price to pay for a Global Britain, but first of all the price is not so small and secondly the whole Global Britain thi ng is a bit of a rather bad joke. Certainly the US, China, Japan even Russia are not terribly impressed, And then there is the EU. Yes, it is still there, despi te all the ‘predictions’ to the contrary. In fact, it certainly does not loo k any less united than before. On the contrary. It has countries like Croatia an d Bulgaria preparing to join the eurozone and a slew of candidates wanting, nay, demanding to join. A bit of a problem at times, but rather a luxury problem if you consider the state of the Union of that kingdom of the isles that is now a t hird country. Did it make a risky choice? Yes definitely. Is it better off? Defi nitely not, no, and recent economic data show that pretty clearly. Comparing Q4 2019 (before the pandemic hit) to Q1 2022 (after it) only two regions are back a bove pre-pandemic levels, London and Northern Ireland of Protocol fame. All othe r regions are in the negative, some like the Western Midlands by some -10% of GD P. It is really hard to call that “better offâ€ed. The numbers are really not there. Of course, you may argue that economics is not everything and that it is a small price to pay for a Global Britain, but first of all the price is not so small and secondly the whole Global Britain thing is a bit of a rather bad joke. Certainly the US, China, Japan even Russia are not terribly impressed, And then there is the EU. Yes, it is still there, despite all the ‘predictions’ to t he contrary. In fact, it certainly does not look any less united than before. On the contrary. It has countries like Croatia and Bulgaria preparing to join the eurozone and a slew of candidates wanting, nay, demanding to join. A bit of a pr oblem at times, but rather a luxury problem if you consider the state of the Uni on of that kingdom of the isles that is now a third country. Did it make a risky choice? Yes definitely. Is it better off? Definitely not, no, and recent econom ic data show that pretty clearly. Comparing Q4 2019 (before the pandemic hit) to Q1 2022 (after it) only two regions are back above pre-pandemic levels, London and Northern Ireland of Protocol fame. All other regions are in the negative, so me like the Western Midlands by some -10% of GDP. It is really hard to call that “better offâ€ed. The numbers are really not there. Of course, you may argue t hat economics is not everything and that it is a small price to pay for a Global Britain, but first of all the price is not so small and secondly the whole Glob al Britain thing is a bit of a rather bad joke. Certainly the US, China, Japan e ven Russia are not terribly impressed, And then there is the EU. Yes, it is stil l there, despite all the ‘predictions’ to the contrary. In fact, it certainl y does not look any less united than before. On the contrary. It has countries l ike Croatia and Bulgaria preparing to join the eurozone and a slew of candidates wanting, nay, demanding to join. A bit of a problem at times, but rather a luxu ry problem if you consider the state of the Union of that kingdom of the isles t hat is now a third count