首页 > 编程 > C++ > 正文

C++ 哈夫曼树对文件压缩、加密实现代码

2020-01-26 13:57:26
字体:
来源:转载
供稿:网友

在以前写LZW压缩算法的时候,遇到很多难受的问题,基本上都在哈夫曼编码中解决了,虽然写这代码很费神,但还是把代码完整的码出来了,毕竟哈夫曼这个思想确实很牛逼。哈夫曼树很巧妙的解决了当时我在LZW序列化的时候想解决的问题,就是压缩后文本的分割。比如用lzw编码abc,就是1,2,3。但这个在存为文件的时候必须用分割符把1,2,3分割开,非常浪费空间,否则会和12 23 123 产生二义性。而哈夫曼树,将所有char分布在叶节点上,在还原的时候,比如1101110,假设110是叶节点,那么走到110的时候就可以确定,已经走到尽头,回到根节点继续走,这样就避免了字符的分割,全部用1010101010101这样的路径表示字符,可以将8位压缩为1个char进行存储。在构造树的时候,将出现率高的char放在上面,这样路径就很短,自然就节省了存储空间。虽然哈夫曼压缩效率不是最高的,但还算比较乐观的。

哈夫曼除了压缩以外还可以用于加密,在将文本用哈夫曼编码时,需持久化生成的char计数链表结构,这样才能还原出树结构,而解码时路径正是依赖于树结构的。也就是说,这种编码是属于约定形式的编码,在编码时用原文本产生树结构,而存储的是树路径,解码的时候缺少树或树结构与原先不相符都是无法完成解码的,就好比,我用10代表a,你存的是10,你将10解释为 b或c等等都是不正确的。由于转换为了char存储,所以还需持久化最后填充的数目、文本长度,才能还原出原先的01表示的文本格式

这个代码有一定缺陷,由于当时考虑的是对文本进行处理,当文件中有char='/0' 时会出现错误,这个代码打的很费神,就不继续修复了,如有需要,可自行更改,解决的办法应该挺多的

先来个运行图:

源代码

#include<iostream> #include<sstream> #include<fstream>  void WriteFile(char* path,const char* content,int length,bool append=false); using namespace std; struct Node{    char data;   Node* left;   Node* right;  };  struct L_Node{   int count;   Node* node;   L_Node* next; };  Node* AddNode(int count,char data,L_Node*& first){   L_Node* lnode=new L_Node();   lnode->count=count;   Node* node=new Node();   node->data=data;   node->left=0;   node->right=0;   lnode->node=node;   if(first==0){     first=lnode;   }   else{     if(lnode->count<first->count){       lnode->next=first;       first=lnode;     }     else{       L_Node* iter=first;              while(iter->next!=0&&iter->next->count<lnode->count){         iter=iter->next;       }              if(iter->next==0){         iter->next=lnode;         lnode->next=0;       }       else{         lnode->next=iter->next;         iter->next=lnode;       }     }   }   return node; }  void SaveLNodes(L_Node* first){   stringstream ss;   while(first!=0){     ss<<(int)(unsigned char)first->node->data<<':'<<first->count<<' ';     first=first->next;   }   WriteFile("nodes.txt",ss.str().c_str(),ss.str().length()); }  void GetLNodes(L_Node*& first){   char temp[32];   ifstream in;   in.open("nodes.txt",ios::in|ios::binary);   while(!in.eof()){     temp[0]=0;     in>>temp;     if(strlen(temp)>0){       char* data=strtok(temp,":");       char* count=strtok(0,":");       AddNode(atoi(count),atoi(data),first);     }        } }  void BuildSortedList(char* content,L_Node*& first,int length){   int array[256]={     0   };    for(int i=0;i<length;i++){     array[(unsigned char)content[i]]++;   }    for(int i=0;i<256;i++){     if(array[i]>0){       AddNode(array[i],(char)i,first);     }   }   SaveLNodes(first); }  void BuildTree(L_Node*& first,Node*& root){//get l1->node,l2->node,remove l1,l2,then put l3 into list,then set l3->left and l3->right   if(first->next==0){     Node* node=new Node();     root=node;     root->right=0;     node=new Node();     node->data=first->node->data;     node->left=0;     node->right=0;     root->left=node;     delete first;     return;   }         while(first->next!=0){     int count=first->count+first->next->count;     Node* node1=first->node;     L_Node* temp=first;     first=first->next;     delete temp;     Node* node2=first->node;     temp=first;     delete temp;     first=first->next;     root=AddNode(count,0,first);     root->left=node1;     root->right=node2;     //cout<<(int)node1->data<<':'<<(int)node2->data<<endl;   }   delete first; }  void PreOrderTraversal(Node* node,char* track,int branch,char** table){   if(node!=0){          char* track2=0;          if(branch==0){       track2=new char[strlen(track)+2];       sprintf(track2,"%s0/0",track);     }     else if(branch==1){       track2=new char[strlen(track)+2];       sprintf(track2,"%s1/0",track);     }     else{       track2=new char[strlen(track)+1];       sprintf(track2,"%s/0",track);     }        if(node->data!=0){       table[(unsigned char)node->data]=track2;     }          PreOrderTraversal(node->left,track2,0,table);     PreOrderTraversal(node->right,track2,1,table);             if(node->data==0){       delete track2;     }   } }  void PreOrderTraversal(Node* node){   if(node!=0){     cout<<(int)(unsigned char)node->data<<endl;     PreOrderTraversal(node->left);     PreOrderTraversal(node->right);   } }  char* Encode(const char* content,char** table,int length){      stringstream ss;    for(int i=0;i<length;i++){     if((unsigned char)content[i]==0){      }     else{       ss<<table[(unsigned char)content[i]];      }   }         char* encoded_content=new char[ss.str().length()+1];   memcpy(encoded_content,ss.str().c_str(),ss.str().length());   encoded_content[ss.str().length()]=0;   return encoded_content; }  int BinToDec(char* bin_content){   int number=0;   int cur=1;    for(int i=7;i>=0;i--){     number+=(bin_content[i]-'0')*cur;     cur*=2;   }   return number; }   char* BinToCharText(const char* bin_content,int& fill_count,int& save_length){   int length=strlen(bin_content);      fill_count=8-length%8;    if(fill_count>0){     char* temp=new char[length+fill_count+1];          char temp1[fill_count];     for(int i=0;i<fill_count;i++){       temp1[i]='0';     }          sprintf(temp,"%s%s",bin_content,temp1);     temp[length+fill_count]=0;     bin_content=temp;   }      length+=fill_count;      save_length=length/8;    char* text=new char[length/8+1];   for(int i=0;i<length;i+=8){     char temp[8];     memcpy(temp,bin_content+i,8);     text[i/8]=(char)BinToDec(temp);       }   text[length/8]=0;      if(fill_count>0){     delete bin_content;   }      return text; }  char* DecToBin(int num){   char* bin=new char[8];   if(num<0){     num=256+num;   }      for(int i=7;i>=0;i--){     bin[i]=num%2+'0';     num/=2;   }   return bin; }  char* CharTextToBin(char* text,int fill_count,int save_length){   int length=save_length;    char* content=new char[8*length+1];      int pos=0;   for(int i=0;i<length;i++){     int number=text[i];     char* bin=DecToBin(number);     memcpy(content+pos,bin,8);     pos+=8;     delete bin;   }      content[8*length-fill_count]=0;    return content; }  char* Decode(const char* encode_content,Node* tree){   stringstream ss;   Node* node=tree;    for(int i=0;i<strlen(encode_content);i++){     if(encode_content[i]=='0'){       node=node->left;     }     else if(encode_content[i]=='1'){       node=node->right;     }      if(node->data!=0){       ss<<node->data;       node=tree;     }   }   char* decode_content=new char[ss.str().length()+1];   memcpy(decode_content,ss.str().c_str(),ss.str().length());   decode_content[ss.str().length()]=0;   return decode_content; }   void ReleaseTable(char** table){   for(int i=0;i<256;i++){     if(table[i]!=0){       delete table[i];     }   } }  void PostOrderTraversal(Node* node){   if(node!=0){     PostOrderTraversal(node->left);     PostOrderTraversal(node->right);     delete node;   } }   char* ReadFile(char* path,long& length){   char* content=0;   ifstream in;   in.open(path,ios::in|ios::binary);   in.seekg(0,ios::end);   length=in.tellg();   content=new char[length+1];   in.seekg(0,ios::beg);   int i=0;   while(!in.eof()){     content[i++]=in.get();   }   content[length]=0;   in.close();   return content; }  char* ReadFile(char* path,int& fill_count,int& save_length){   char* content=0;   ifstream in;   in.open(path,ios::in|ios::binary);   in>>fill_count>>save_length;   long cur=in.tellg()+(long)1;   in.seekg(0,ios::end);   long length=in.tellg()-cur;   content=new char[length+1];   in.seekg(cur,ios::beg);   int i=0;   while(!in.eof()){     content[i++]=in.get();   }   content[length]=0;   in.close();   return content; }  void WriteFile(char* path,const char* content,int length,bool append){   ofstream out;   if(append){     out.open(path,ios::out|ios::binary|ios::app);   }   else{     out.open(path,ios::out|ios::binary);   }      out.write(content,length);   out.close(); }  int main(){   long length;   char* content=ReadFile("content.txt",length);    L_Node* first=0;      BuildSortedList(content,first,length); //create nodes list and save to nodes file   //GetLNodes(first);//get and recreate nodes from file    Node* root=0;//used for buildtable and decode   BuildTree(first,root);//build tree by nodes list and release sorted list      char* table[256]={//build table,used for encode     0   };    PreOrderTraversal(root,"",-1,table);//create table       char* encode_content=Encode(content,table,length);//convert content to encoded bin text   cout<<encode_content<<endl;   delete content;      ReleaseTable(table);//release table when encode finished         int fill_count;   int save_length;   char* save_text=BinToCharText(encode_content,fill_count,save_length);//convert encoded bin text to char text and save these text to file   delete encode_content;   char head_info[32];   sprintf(head_info,"%d %d ",fill_count,save_length);   WriteFile("encoded_content.txt",head_info,strlen(head_info));   WriteFile("encoded_content.txt",save_text,save_length,true);   delete save_text;   save_text=ReadFile("encoded_content.txt",fill_count,save_length);//read fill_count、save_length、encoded char text from file      char* bin_text= CharTextToBin(save_text,fill_count,save_length);//convert char text to bin text   delete save_text;      char* decode_content=Decode(bin_text,root);//decode by bin_text and tree   cout<<decode_content<<endl;   delete bin_text;   delete decode_content;         PostOrderTraversal(root);//release tree      return 0; } 

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持武林网。

发表评论 共有条评论
用户名: 密码:
验证码: 匿名发表