当前位置:文档之家› 五种数据压缩算法

五种数据压缩算法

⏹哈弗曼编码A method for the construction of minimum-re-dundancy codes,耿1数据结构1:高等教育,2005:182—190严蔚敏,吴伟民.数据结构(C语言版)[M].:清华大学,1997.桂,林其伟,东华.信息论与编码技术[M].:清华大学,2007.大有,唐海鹰,舒,等.数据结构[M].:高等教育,2001✧压缩实现速度要求为了让它(huffman.cpp)快速运行,同时不使用任何动态库,比如STL或者MFC。

它压缩1M数据少于100ms(P3处理器,主频1G)。

压缩过程压缩代码非常简单,首先用ASCII值初始化511个哈夫曼节点:CHuffmanNode nodes[511];for(int nCount = 0; nCount < 256; nCount++)nodes[nCount].byAscii = nCount;其次,计算在输入缓冲区数据中,每个ASCII码出现的频率:for(nCount = 0; nCount < nSrcLen; nCount++)nodes[pSrc[nCount]].nFrequency++;然后,根据频率进行排序:qsort(nodes, 256, sizeof(CHuffmanNode), frequencyCompare);哈夫曼树,获取每个ASCII码对应的位序列:int nNodeCount = GetHuffmanTree(nodes);构造哈夫曼树构造哈夫曼树非常简单,将所有的节点放到一个队列中,用一个节点替换两个频率最低的节点,新节点的频率就是这两个节点的频率之和。

这样,新节点就是两个被替换节点的父节点了。

如此循环,直到队列中只剩一个节点(树根)。

// parent nodepNode = &nodes[nParentNode++];// pop first childpNode->pLeft = PopNode(pNodes, nBackNode--, false);// pop second childpNode->pRight = PopNode(pNodes, nBackNode--, true);// adjust parent of the two poped nodespNode->pLeft->pParent = pNode->pRight->pParent = pNode;// adjust parent frequencypNode->nFrequency=pNode->pLeft->nFrequency + pNode->pRight->nFrequency 注意事项有一个好的诀窍来避免使用任何队列组件。

ASCII码只有256个,但实际分配了511个(CHuffmanNode nodes[511]),前255个记录ASCII码,而用后255个记录哈夫曼树中的父节点。

并且在构造树的时候只使用一个指针数组(ChuffmanNode *pNodes[256])来指向这些节点。

同样使用两个变量来操作队列索引(int nParentNode = nNodeCount;nBackNode = nNodeCount –1)。

接着,压缩的最后一步是将每个ASCII编码写入输出缓冲区中:int nDesIndex = 0;// loop to write codesfor(nCount = 0; nCount < nSrcLen; nCount++){*(DWORD*)(pDesPtr+(nDesIndex>>3)) |=nodes[pSrc[nCount]].dwCode << (nDesIndex&7);nDesIndex += nodes[pSrc[nCount]].nCodeLength;}(nDesIndex>>3): >>3 以8位为界限右移后到达右边字节的前面(nDesIndex&7): &7 得到最高位.此外,在压缩缓冲区中,必须保存哈夫曼树的节点以及位序列,这样才能在解压缩时重新构造哈夫曼树(只需保存ASCII值和对应的位序列)。

解压缩解压缩比构造哈夫曼树要简单的多,将输入缓冲区中的每个编码用对应的ASCII码逐个替换就可以了。

只要记住,这里的输入缓冲区是一个包含每个ASCII值的编码的位流。

因此,为了用ASCII值替换编码,我们必须用位流搜索哈夫曼树,直到发现一个叶节点,然后将它的ASCII值添加到输出缓冲区中:int nDesIndex = 0;DWORD nCode;while(nDesIndex < nDesLen){nCode = (*(DWORD*)(pSrc+(nSrcIndex>>3)))>>(nSrcIndex&7);pNode = pRoot;while(pNode->pLeft){pNode = (nCode&1) ? pNode->pRight : pNode->pLeft;nCode >>= 1;nSrcIndex++;}pDes[nDesIndex++] = pNode->byAscii;}程序实现费诺编码#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#define M 100typedef struct Fano_Node{char ch;float weight;}FanoNode[M];typedef struct node{int start;int end;struct node *next;}LinkQueueNode;typedef struct{LinkQueueNode *front;LinkQueueNode *rear;}LinkQueue;//建立队列void EnterQueue(LinkQueue *q,int s,int e){LinkQueueNode *NewNode;//生成新节点NewNode=(LinkQueueNode*)malloc(sizeof( LinkQueueNode )); if(NewNode!=NULL){NewNode->start=s;NewNode->end=e;NewNode->next=NULL;q->rear->next=NewNode;q->rear=NewNode;}else{printf("Error!");exit(-1);}}//按权分组void Divide(FanoNode f,int s,int *m,int e){int i;float sum,sum1;sum=0;for(i=s;i<=e;i++)sum+=f[i].weight;//*m=s;sum1=0;for(i=s;i<e;i++){sum1+=f[i].weight;*m=fabs(sum-2*sum1)>fabs(sum-2*sum1-2*f[i+1].weight)?(i+1):*m; if(*m==i) break;}}void main(){int i,j,n,max,m,h[M];int sta,end;float w;char c,fc[M][M];FanoNode FN;LinkQueueNode *p;LinkQueue *Q;//初始化队QQ=(LinkQueue *)malloc(sizeof(LinkQueue));Q->front=(LinkQueueNode*)malloc(sizeof(LinkQueueNode)); Q->rear=Q->front;Q->front->next=NULL;printf("\t***FanoCoding***\n");printf("Please input the number of node:");//输入信息scanf("%d",&n);//超过定义M,退出if(n>=M){printf(">=%d",M);exit(-1);}i=1; //从第二个元素开始录入while(i<=n){printf("%d weight and node:",i);scanf("%f %c",&FN[i].weight,&FN[i].ch); for(j=1;j<i;j++){if(FN[i].ch==FN[j].ch)//查找重复{printf("Same node!!!\n"); break;}}if(i==j)i++;}//排序(降序)for(i=1;i<=n;i++){max=i+1;for(j=max;j<=n;j++)max=FN[max].weight<FN[j].weight?j:max; if(FN[i].weight<FN[max].weight){w=FN[i].weight;FN[i].weight=FN[max].weight;FN[max].weight=w;c=FN[i].ch;FN[i].ch=FN[max].ch;FN[max].ch=c;}for(i=1;i<=n;i++) //初始化hh[i]=0;EnterQueue(Q,1,n); //1和n进队while(Q->front->next!=NULL){p=Q->front->next; //出队Q->front->next=p->next;if(p==Q->rear)Q->rear=Q->front;sta=p->start;end=p->end;free(p);Divide(FN,sta,&m,end); /*按权分组*/ for(i=sta;i<=m;i++){fc[i][h[i]]='0';++h[i];}if(sta!=m)EnterQueue(Q,sta,m);elsefc[sta][h[sta]]='\0';for(i=m+1;i<=end;i++){fc[i][h[i]]='1';++h[i];if(m==sta&&(m+1)==end)//如果分组后首元素的下标与中间元素的相等,//并且和最后元素的下标相差为1,则编码码字字符串结束{fc[m][h[m]]='\0';fc[end][h[end]]='\0';}elseEnterQueue(Q,m+1,end);}for(i=1;i<=n;i++) /*打印编码信息*/{printf("%c:",FN[i].ch);printf("%s\n",fc[i]);}system("pause");}[4]编码解码#include <stdio.h>#include <stdlib.h>#include <string.h>#define N 100#define M 2*N-1typedef char * HuffmanCode[2*M];//haffman编码typedef structint weight;//权值int parent;//父节节点int LChild;//左子节点int RChild;//右子节点}HTNode,Huffman[M+1];//huffman树typedef struct Node{int weight; //叶子结点的权值char c; //叶子结点int num; //叶子结点的二进制码的长度}WNode,WeightNode[N];/***产生叶子结点的字符和权值***/void CreateWeight(char ch[],int *s,WeightNode CW,int *p) {int i,j,k;int tag;*p=0;//叶子节点个数//统计字符出现个数,放入CWfor(i=0;ch[i]!='\0';i++){tag=1;for(j=0;j<i;j++)if(ch[j]==ch[i]){tag=0;break;}if(tag){CW[++*p].c=ch[i];CW[*p].weight=1;for(k=i+1;ch[k]!='\0';k++)if(ch[i]==ch[k])CW[*p].weight++;//权值累加}}*s=i;//字符串长度}/********创建HuffmanTree********/void CreateHuffmanTree(Huffman ht,WeightNode w,int n) {int i,j;int s1,s2;//初始化哈夫曼树for(i=1;i<=n;i++){ht[i].weight =w[i].weight;ht[i].parent=0;ht[i].LChild=0;ht[i].RChild=0;}for(i=n+1;i<=2*n-1;i++){ht[i].weight=0;ht[i].parent=0;ht[i].LChild=0;ht[i].RChild=0;}for(i=n+1;i<=2*n-1;i++){for(j=1;j<=i-1;j++)if(!ht[j].parent)break;s1=j; //找到第一个双亲为零的结点for(;j<=i-1;j++)if(!ht[j].parent)s1=ht[s1].weight>ht[j].weight?j:s1;ht[s1].parent=i;ht[i].LChild=s1;for(j=1;j<=i-1;j++)if(!ht[j].parent)break;s2=j; //找到第二个双亲为零的结点for(;j<=i-1;j++)if(!ht[j].parent)s2=ht[s2].weight>ht[j].weight?j:s2;ht[s2].parent=i;ht[i].RChild=s2;ht[i].weight=ht[s1].weight+ht[s2].weight;//权值累加}/***********叶子结点的编码***********/void CrtHuffmanNodeCode(Huffman ht,char ch[],HuffmanCode h,WeightNode weight,int m,int n){int i,c,p,start;char *cd;cd=(char *)malloc(n*sizeof(char));cd[n-1]='\0';//末尾置0for(i=1;i<=n;i++){start=n-1; //cd串每次从末尾开始c=i;p=ht[i].parent;//p在n+1至2n-1while(p) //沿父亲方向遍历,直到为0{start--;//依次向前置值if(ht[p].LChild==c)//与左子相同,置0cd[start]='0';else //否则置1cd[start]='1';c=p;p=ht[p].parent;}weight[i].num=n-start; //二进制码的长度(包含末尾0)h[i]=(char *)malloc((n-start)*sizeof(char));strcpy(h[i],&cd[start]);//将二进制字符串拷贝到指针数组h中free(cd);//释放cd存system("pause");}/*********所有字符的编码*********/void CrtHuffmanCode(char ch[],HuffmanCode h,HuffmanCode hc,WeightNode weight,int n,int m){int i,k;for(i=0;i<m;i++){for(k=1;k<=n;k++) /*从weight[k].c中查找与ch[i]相等的下标K*/if(ch[i]==weight[k].c)break;hc[i]=(char *)malloc((weight[k].num)*sizeof(char));strcpy(hc[i],h[k]); //拷贝二进制编码}}/*****解码*****/void TrsHuffmanTree(Huffman ht,WeightNode w,HuffmanCode hc,int n,int m){int i=0,j,p;printf("***StringInformation***\n");while(i<m){p=2*n-1;//从父亲节点向下遍历直到叶子节点for(j=0;hc[i][j]!='\0';j++)if(hc[i][j]=='0')p=ht[p].LChild;elsep=ht[p].RChild;}printf("%c",w[p].c); /*打印原信息*/i++;}}/*****释放huffman编码存*****/void FreeHuffmanCode(HuffmanCode h,HuffmanCode hc,int n,int m) {int i;for(i=1;i<=n;i++)//释放叶子结点的编码free(h[i]);for(i=0;i<m;i++) //释放所有结点的编码free(hc[i]);}void main(){int i,n=0; /*n为叶子结点的个数*/int m=0; /*m为字符串ch[]的长度*/char ch[N]; /*ch[N]存放输入的字符串*/Huffman ht; /*Huffman二叉数*/HuffmanCode h,hc; /*h存放叶子结点的编码,hc 存放所有结点的编码*/ WeightNode weight; /*存放叶子结点的信息*/printf("\t***HuffmanCoding***\n");printf("please input information :");gets(ch); /*输入字符串*/CreateWeight(ch,&m,weight,&n); /*产生叶子结点信息,m为字符串ch[]的长度*/printf("***WeightInformation***\n Node ");for(i=1;i<=n;i++) /*输出叶子结点的字符与权值*/printf("%c ",weight[i].c);printf("\nWeight ");for(i=1;i<=n;i++)printf("%d ",weight[i].weight);CreateHuffmanTree(ht,weight,n); /*产生Huffman树*/printf("\n***HuffamnTreeInformation***\n");printf("\ti\tweight\tparent\tLChild\tRChild\n");for(i=1;i<=2*n-1;i++) /*打印Huffman树的信息*/printf("\t%d\t%d\t%d\t%d\t%d\n",i,ht[i].weight,ht[i].parent,ht[i].LChil d,ht[i].RChild);CrtHuffmanNodeCode(ht,ch,h,weight,m,n); /*叶子结点的编码*/printf(" ***NodeCode***\n"); /*打印叶子结点的编码*/for(i=1;i<=n;i++){printf("\t%c:",weight[i].c);printf("%s\n",h[i]);}CrtHuffmanCode(ch,h,hc,weight,n,m); /*所有字符的编码*/printf("***StringCode***\n"); /*打印字符串的编码*/for(i=0;i<m;i++)printf("%s",hc[i]);system("pause");TrsHuffmanTree(ht,weight,hc,n,m); /*解码*/FreeHuffmanCode(h,hc,n,m);system("pause");}[5]Matlab实现Matlab 中简易实现Huffman编译码:n=input('Please input the total number: ');hf=zeros(2*n-1,5);hq=[];for ki=1:nhf(ki,1)=ki;hf(ki,2)=input('Please input the frequency: ');hq=[hq,hf(ki,2)];endfor ki=n+1:2*n-1hf(ki,1)=ki;mhq1=min(hq);m=size(hq);m=m(:,2);k=1;while k<=m%del min1if hq(:,k)==mhq1hq=[hq(:,1:(k-1)) hq(:,(k+1):m)];m=m-1;elsek=k+1;endendk=1;while hf(k,2)~=mhq1|hf(k,5)==1%find min1 location k=k+1;endhf(k,5)=1;k1=k;mhq2=min(hq);k=1;while k<=m%del min2if hq(:,k)==mhq2hq=[hq(:,1:(k-1)) hq(:,(k+1):m)];m=m-1;breakelsek=k+1;endendk=1;while hf(k,2)~=mhq2|hf(k,5)==1%find min2 location k=k+1;endhf(k,5)=1;hf(ki,2)=mhq1+mhq2;hf(ki,3)=k1;hf(ki,4)=k2;hq=[hq hf(ki,2)];endclcchoose=input('Please choose what you want:\n1: Encoding\n2: Decoding\n3:.Exit\n');while choose==1|choose==2if choose==1a=input('Please input the letter you want to Encoding: ');k=1;while hf(k,2)~=ak=k+1;if k>=ndisplay('Error! You did not input this number.');breakendendif k>=nbreakendr=[];while hf(k,5)==1kc=n+1;while hf(kc,3)~=k&hf(kc,4)~=kkc=kc+1;endif hf(kc,3)==kr=[0 r];elser=[1 r];endk=kc;endrelsea=input('Please input the metrix you want to Decoding: '); sa=size(a);sa=sa(:,2);k=2*n-1;while sa~=0if a(:,1)==0k=hf(k,3);elsek=hf(k,4);enda=a(:,2:sa);sa=sa-1;if k==0display('Error! The metrix you entered is a wrong one.'); breakendendif k==0breakendr=hf(k,2);endchoose=input('Choose what you want:\n1: Encoding\n2: Decoding\n3:.Exit\n');clc;endif choose~=1&choose~=2clc;end⏹LZ变换✧LZ77压缩算法原理1977年,Jacob Ziv和Abraham Lempel描述了一种基于滑动窗口缓存的技术,该缓存用于保存最近刚刚处理的文本(J. Ziv and A. Lempel, “A Universal Algorithm for Sequential Data Compression”, IEEE Transaction on Information Theory, May 1977)。

相关主题